In [1]:
import numpy as np
import pandas as pd

import torcheval
import logging
import warnings
import operator
import shutil
import torch
from torcheval.metrics.functional import multiclass_f1_score, multiclass_accuracy, multiclass_precision, multiclass_recall
from simpletransformers.classification import ClassificationArgs
from simpletransformers.classification import ClassificationModel

from warnings import simplefilter
from statistics import mean

simplefilter("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
    

In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = '1'

In [3]:
def return_typical_regr_args(MAX_SEQ_LEN, hd=0.05, epochs=3, evaluate_during_training=False, update_params=None):
    """ Initialize the typical regr arguements."""
    model_args_reg = ClassificationArgs()
    model_args_reg.num_train_epochs = epochs
    model_args_reg.overwrite_output_dir = True
    model_args_reg.save_steps = -1
    model_args_reg.save_model_every_epoch = False
    model_args_reg.fp16=False
    model_args_reg.regression = False
    model_args_reg.num_labels = 3
    model_args_reg.evaluate_during_training = evaluate_during_training
    model_args_reg.max_seq_length = MAX_SEQ_LEN
    model_args_reg.train_batch_size = 8
    model_args_reg.config = {'hidden_dropout_prob' : hd}
    if update_params:
        model_args_reg.custom_parameter_groups = [{"params" : update_params}]
        model_args_reg.train_custom_parameters_only = True
    return model_args_reg

In [4]:
def create_and_double_finetune(model_name, model_version, train_df, hd = 0.05, default_lr = 0.00004, evaluate_during_training=False, MAX_SEQ_LEN = 128, typical_args=True):
    """ 
    Create and double finetune (HeFit) 
    a model given the train dataframe.
    """

    model = ClassificationModel(model_name, model_version, num_labels=3) # initialize to get named params

    frozen_parameters_size = len(model.get_named_parameters()) - 4 # 4 for the heads layers

    if evaluate_during_training:
        train_df, eval_df = train_test_split(train_df, test_size=0.2)
    
    # get final arguments
    # use this to freeze encoder embeddings and not the classifier head
    # model.get_named_parameters()[frozen_parameters_size:]
    model_args_reg = return_typical_regr_args(MAX_SEQ_LEN,hd=hd, evaluate_during_training=evaluate_during_training, update_params=model.get_named_parameters()[frozen_parameters_size:])
    del model

    # Create a ClassificationModel with frozen embeddings
    model = ClassificationModel(
        model_name,
        model_version,
        num_labels=3,
        args=model_args_reg,
    )
    

    if evaluate_during_training:
        model.train_model(train_df, eval_df=eval_df)
        model = perform_second_stage_finetune(model, train_df, eval_df=eval_df, default_lr=default_lr)
    else:
        model.train_model(train_df)
        model = perform_second_stage_finetune(model, train_df, default_lr=default_lr)
    
    return model

def perform_second_stage_finetune(model, train_df, default_lr, eval_df=None):
    """ 
    Perform the second stage (unfrozen embeddings) 
    of the HeFit finetuning procedure.
    """
    # unfreezes embeddings
    model.args.custom_parameter_groups = [{"params" : model.get_named_parameters()}]
    model.args.train_custom_parameters_only = False 

    # for second stage
    model.args.learning_rate = (default_lr/2) # half lr
    model.args.num_train_epochs = HeFiT_2nd_stage_epochs # double epochs

    
    if eval_df == None: # train again (from now checkpoint) with unfrozen embeddings
        model.train_model(train_df) 
    else:
        model.train_model(train_df, eval_df=eval_df)
    return model

def create_and_single_adapter_finetune(model_name, model_version, train_df, adapters_epochs, hd=0.05, default_lr = 0.00004, MAX_SEQ_LEN = 128, evaluate_during_training=False, typical_args=True):
    """ 
    Create and single finetune using adapters 
    a model given the train dataframe.
    """
    model_args_reg = return_typical_regr_args(MAX_SEQ_LEN, hd=hd, evaluate_during_training=evaluate_during_training)
    model_args_reg.num_train_epochs = adapters_epochs
    
    model = ClassificationModel(
        model_name,
        model_version,
        num_labels=3,
        args=model_args_reg,
    )
    
    #add adapters
    model.model.add_adapter("CLASSIFICATION_ADAPTERS")

    # Activate the adapters
    model.model.train_adapter("CLASSIFICATION_ADAPTERS")

    model.train_model(train_df)

    return model


def create_and_single_finetune(model_name, model_version, train_df, sFiT_epochs, hd=0.05, default_lr = 0.00004, MAX_SEQ_LEN = 128, evaluate_during_training=False, typical_args=True):
    """ 
    Create and single finetune, standard finetune SFIT
    a model given the train dataframe.
    """
    model_args_reg = return_typical_regr_args(MAX_SEQ_LEN, hd=hd, evaluate_during_training=evaluate_during_training)
    model_args_reg.num_train_epochs = sFiT_epochs

    model = ClassificationModel(
        model_name,
        model_version,
        num_labels=3,
        args=model_args_reg,
    )

    model.train_model(train_df)

    return model


In [5]:
def evaluate_model(data, save_name, model):
    """
    evaluates the model With 
    Accuracy, F1, precision, recall (macro)
    saves evaluation in a csv file
    """
    r, _ = model.predict(list(data['text']))
    
    
#     print(multiclass_accuracy(torch.tensor(data['label']), torch.tensor(r), num_classes=3))
#     print(multiclass_f1_score(torch.tensor(data['label']), torch.tensor(r), num_classes=3))
#     print(multiclass_precision(torch.tensor(data['label']), torch.tensor(r), num_classes=3))
#     print(multiclass_recall(torch.tensor(data['label']), torch.tensor(r), num_classes=3))
    
    df = pd.DataFrame()
    df['Accuracy'] = [multiclass_accuracy(torch.tensor(data['label']), torch.tensor(r), num_classes=3)]
    df['F1'] = [multiclass_f1_score(torch.tensor(data['label']), torch.tensor(r), num_classes=3, average='macro')]
    df['Precision'] = [multiclass_precision(torch.tensor(data['label']), torch.tensor(r), num_classes=3, average='macro')]
    df['Recall'] = [multiclass_recall(torch.tensor(data['label']), torch.tensor(r), num_classes=3, average='macro')]
    
    df.to_csv(save_name)


In [6]:
def train_and_evaluate_all_models(test_pairs, model_name, model_version, lr, MAX_SEQ_LEN, hd=0.05, iterations=5, epochs=3, evaluate_during_training=False, training_mode='hefit'):
    """ finetune(or double) models for all affects regression. """
    
    if isinstance(iterations, int):
        for_iter = range(iterations)
    else:
        for_iter = iterations
    
    for test_pair in test_pairs:
        train_df = pd.read_csv(test_pair[1])
        train_df = train_df[['text', 'label']]
        test_df = pd.read_csv(test_pair[2])
        for i in for_iter:
    
            if training_mode=='hefit':
                model = create_and_double_finetune(model_name = model_name, model_version = model_version[1], train_df = train_df, hd=hd, evaluate_during_training=evaluate_during_training, MAX_SEQ_LEN = MAX_SEQ_LEN)
                save_directory = f'/home/user/konstantinou/{test_pair[0]} Results/HeFit'
            elif training_mode=='adapters':
                model = create_and_single_adapter_finetune(model_name = model_name, model_version = model_version[1], train_df = train_df, adapters_epochs=epochs, hd=hd, evaluate_during_training=evaluate_during_training, MAX_SEQ_LEN = MAX_SEQ_LEN)
                save_directory = f'/home/user/konstantinou/{test_pair[0]} Results/{epochs} epochs Adapters'
            else: #else sfit
                model = create_and_single_finetune(model_name = model_name, model_version = model_version[1], train_df = train_df, sFiT_epochs=epochs, hd=hd, evaluate_during_training=evaluate_during_training, MAX_SEQ_LEN = MAX_SEQ_LEN)
                save_directory = f'/home/user/konstantinou/{test_pair[0]} Results/{epochs} epochs SFIT'
            
            
            try:  
                os.mkdir(save_directory)  
            except OSError as error:  
                print('Directory already exists')  
            
            save_name = f'{save_directory}/{model_name}_{i}_res.csv'
            evaluate_model(test_df, save_name, model)
            del model
            torch.cuda.empty_cache()

            # shutil.rmtree('outputs')


In [7]:
def run_sfit_training(epoch_training_list, train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, iterations=5):
    for sFit_epochs in epoch_training_list:
        train_and_evaluate_all_models(train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, iterations=iterations, epochs=sFit_epochs, evaluate_during_training=False, training_mode='sfit')
def run_adapter_training(epoch_training_list, train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, iterations=5):
    for adapter_epochs in epoch_training_list:
        train_and_evaluate_all_models(train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, iterations=iterations, epochs=adapter_epochs, evaluate_during_training=False, training_mode='adapters')


In [8]:
epoch_training_list = [50]
it = 5

model_name = "roberta"
model_version = ["roberta", "roberta-base"]

# default_lr = 0.00004
default_lr = 4e-5
MAX_SEQ_LEN = 128
HeFiT_2nd_stage_epochs = 6

torch.cuda.is_available()

True

## Twitter Trainning

In [15]:
tr_dir = f'/home/user/konstantinou/Twitter_Data training_3750.csv'
val_dir = f'/home/user/konstantinou/Twitter_Data eval_3750.csv'

train_test_pairs = [
     [f'Twitter_data_3750', tr_dir, val_dir],
 ]

In [16]:
run_sfit_training([3,4,5,6], train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, 
                  iterations=it)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

In [17]:
run_adapter_training([45, 50, 55, 60], train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, 
                  iterations=it)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

Running Epoch 0 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 4 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 16 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 21 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 33 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 38 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 2 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 6 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 18 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 23 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 35 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 3 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 7 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 19 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 36 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 41 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 1 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 8 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 17 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 25 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 34 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 3 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 10 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 19 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 27 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 36 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 44 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 4 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 11 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 20 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 28 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 37 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 45 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 1 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 8 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 17 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 25 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 34 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 38 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 53 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 54 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 2 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 15 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 32 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 36 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 50 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 51 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

Running Epoch 13 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 16 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 29 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 33 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 37 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 4 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 7 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 21 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 24 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 39 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 42 of 55:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 3 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 6 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 20 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 23 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 37 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 40 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 54 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 55 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Running Epoch 0 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 12 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 17 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 29 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 34 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 47 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 48 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 53 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 6 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 11 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 23 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 28 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 41 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 47 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 58 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 59 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 4 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 16 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 21 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 33 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 38 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 51 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 52 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 58 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 10 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 15 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 27 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 32 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 45 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 50 of 60:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [18]:
#hefit
train_and_evaluate_all_models(train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, iterations=it, epochs=None, evaluate_during_training=False, training_mode='hefit')


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Reddit Trainning

In [22]:
tr_dir = f'Reddit_Data training_3750.csv'
val_dir = f'Reddit_Data eval_3750.csv'

train_test_pairs = [
     [f'Reddit_data_3750', tr_dir, val_dir],
 ]

MAX_SEQ_LEN = 256


In [23]:
run_sfit_training([3,4,5,6], train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, 
                  iterations=it)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 0 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 2 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 1 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/375 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Running Epoch 0 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 6:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

In [None]:
run_adapter_training([45, 50, 55, 60], train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, 
                     iterations=it)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/45 [00:00<?, ?it/s]

Running Epoch 0 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 45:   0%|          | 0/375 [00:00<?, ?it/s]

Directory already exists


  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

Running Epoch 0 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 17 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 18 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 19 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 20 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 21 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 22 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 23 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 24 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 25 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 26 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 27 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 28 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 29 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 30 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 31 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 32 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 33 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 34 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 35 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 36 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 37 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 38 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 39 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 40 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 41 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 42 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 43 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 44 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 45 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 46 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 47 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 48 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 49 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

  0%|          | 0/750 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should pr

  0%|          | 0/3000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

Running Epoch 0 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 1 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 2 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 3 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 4 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 5 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 6 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 7 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 8 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 9 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 10 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 11 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 12 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 13 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 14 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 15 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

Running Epoch 16 of 50:   0%|          | 0/375 [00:00<?, ?it/s]

In [None]:
#hefit
train_and_evaluate_all_models(train_test_pairs, model_name, model_version, default_lr, MAX_SEQ_LEN, iterations=it, epochs=None, evaluate_during_training=False, training_mode='hefit')
