In [1]:
#Loading Libraries
# %conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
# %pip install -U adapter-transformers
# %conda install -y -c conda-forge tensorboard
# %pip install optuna
# %pip install tqdm
# from tqdm.notebook import tqdm

In [1]:
# Loading dataset
from datasets import load_dataset

# dataset_name = 'nsusemiehl/SciERC'
dataset_name = 'zapsdcn/citation_intent'

dataset = load_dataset(dataset_name)
dataset_name = 'citation_intent'

print(dataset.num_rows)

Using custom data configuration zapsdcn--citation_intent-0b0f6658161cc990
Reusing dataset json (C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b)
100%|██████████| 3/3 [00:00<00:00, 499.60it/s]

{'train': 1688, 'test': 139, 'validation': 114}





In [2]:
dataset['train'][255]

{'text': 'There have been several efforts aimed at developing a domain-independent method for generating responses from a frame representation of user requests ( Bobrow et al. , 1977 ; Chu-Carroll , 1999 ) .',
 'label': 'Future',
 'metadata': {}}

This block creates dataset for pretraining

In [3]:
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
# Tokenize the set for the transformer
def encode_batch_pretraining(batch):
    """Encodes a batch of input data using the model tokenizer."""
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128)

# Encode the input data
# NOTE: num_proc does not seem to work, for some reason it can't find the tokenizer
print(dataset['train'].column_names)
dataset_pretraining = dataset.map(encode_batch_pretraining, batched=True, remove_columns=dataset['train'].column_names,)

# We make the labels the same as the input as this is language learning 
def add_labels(examples):
    examples["labels"] = examples["input_ids"].copy()
    return examples
  
dataset_pretraining = dataset_pretraining.map(add_labels, batched=True)
dataset_pretraining.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])



['text', 'label', 'metadata']


Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-69c4b714d94b638b.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-725c9048b708febd.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-d75dc45ad874bb1c.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-c2962fc358ac477f.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\d

In [4]:
# Collater adds padding in the form of EOS tokens, makes data augmentations of random masking ('mlm_probability)
from transformers import DataCollatorForLanguageModeling

tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)

Here we are creating the dataset for task finetuning

In [5]:
# Finding the number of labels
import numpy as np
labels = np.unique(np.array(dataset['train']['label']))
num_of_labels = labels.size

print(labels)
print(num_of_labels)

['Background' 'CompareOrContrast' 'Extends' 'Future' 'Motivation' 'Uses']
6


In [6]:
# encoding the labels
def encode_labels(dataset):
    for i in range(num_of_labels):
        if dataset['label'] == labels[i]:
            dataset['label'] = i
    return dataset

if dataset_name == 'citation_intent':
    dataset = dataset.map(encode_labels, remove_columns=["metadata"])
else:
    dataset = dataset.map(encode_labels)
dataset['train'][0]

Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-f61d529f5717567d.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-f4eb112f2ae43469.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-3aba918a6e420a41.arrow


{'text': 'Thus , over the past few years , along with advances in the use of learning and statistical methods for acquisition of full parsers ( Collins , 1997 ; Charniak , 1997a ; Charniak , 1997b ; Ratnaparkhi , 1997 ) , significant progress has been made on the use of statistical learning methods to recognize shallow parsing patterns syntactic phrases or words that participate in a syntactic relationship ( Church , 1988 ; Ramshaw and Marcus , 1995 ; Argamon et al. , 1998 ; Cardie and Pierce , 1998 ; Munoz et al. , 1999 ; Punyakanok and Roth , 2001 ; Buchholz et al. , 1999 ; Tjong Kim Sang and Buchholz , 2000 ) .',
 'label': 0}

In [7]:
def encode_batch_finetuning(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["text"], max_length=128, truncation=True, padding="max_length")

# Encode the input data
dataset_finetuning = dataset.map(encode_batch_finetuning, batched=True)
# The transformers model expects the target class column to be named "labels"
dataset_finetuning = dataset_finetuning.rename_column("label", 'labels')
# Transform to pytorch tensors and only output the required columns
dataset_finetuning.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-36d4fb079abf4e95.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-d52100e62bd03463.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\zapsdcn--citation_intent-0b0f6658161cc990\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-534970400827b934.arrow


# Model Creation

In [8]:
from transformers import RobertaConfig
from transformers import RobertaAdapterModel

def model_init(adapter_name = 'default_adapter', 
               num_lables = 0, 
               pretraining = False,
               load_adapter = False,
               adapter_dir = 'path'):
    """Creates a new roBERTa model with the given name for its adapter.

    Args:
        adapter_name (str): The name of the adapter to load/create. Defaults to 'default_adapter'.
        num_lables (int, optional): The number of labels for classification task. Defaults to 0.
        pretraining (bool, optional): Whether to create a model for pretraining or classification. Defaults to False.
        load_adapter (bool, optional): Whether to load an adapter with the adapter_name given or create a new one. Defaults to False.
        adapter_dir (str, optional): Directory to load the adapter. If load_adapter you need to specify this.  Defaults to 'path'.

    Returns:
        RobertaAdapterModel: A roBERTA model with an adapter added to it.
    """
    
    if pretraining:
        config = RobertaConfig.from_pretrained(
            "roberta-base",
            # num_labels=num_of_labels,*-8536.22.03
        )
        model = RobertaAdapterModel.from_pretrained(
            "roberta-base",
            config=config,
        )
        if load_adapter:
            # Add new adapter
            model.load_adapter(adapter_dir)

        else:
            # Add new adapter
            model.add_adapter(adapter_name)
            
        # Add a matching classification head
        model.add_masked_lm_head(adapter_name)
            
    else:
        config = RobertaConfig.from_pretrained(
            "roberta-base",
            num_labels=num_lables,
        )
        model = RobertaAdapterModel.from_pretrained(
            "roberta-base",
            config=config,
        )
        
        if load_adapter:
            # Add new adapter
            model.load_adapter(adapter_dir)

        else:
            # Add new adapter
            model.add_adapter(adapter_name)
            
        # Add a matching classification head
        model.add_classification_head(
                adapter_name,
                num_labels=num_lables,
                id2label={0:'Background', 1:'CompareOrContrast', 2:'Extends', 
                        3:'Future', 4:'Motivation', 5:'Uses'},
                overwrite_ok = True)
            
    # Activate the adapter
    model.train_adapter(adapter_name)    
     
    return model

Pretraining Block

In [9]:
from transformers import TrainingArguments, AdapterTrainer
from datasets import load_metric
from torch.utils.tensorboard import SummaryWriter
from transformers.integrations import TensorBoardCallback

import json

def pretraining_loop(num_models, training_args, dataset, 
                     data_collator, adapter_name, 
                    #  DAPT_n_TAPT, TAPT_dataset
                     ):
    """The Loop for running num_models number of models to account for run2run variance. Will run the model 
        and evaluate.

    Args:
        num_models (int): Number of models to loop through
        training_args (transformers.TrainingArguments): The arguments to pass to the trainer
        dataset (dataset): The dataset to train on
        data_collator (data_collator): The data collator for the trainer to use
        adapter_name (str): Name of the adapter to create
    """

    for i in range(num_models):
        adapter = f"{adapter_name}_{i}"
        model = model_init(adapter_name = adapter, pretraining=True)
        
        writer = SummaryWriter(log_dir= f'runs/{adapter}')
        writer = TensorBoardCallback(writer)

        trainer = AdapterTrainer(
            model=model,
            args=training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            data_collator=data_collator,  
            callbacks=[writer] 
        )
        
        trainer.train()
        
        f = open(f"{training_args.output_dir}/evaulations.txt", "a")
        f.write(adapter)
        f.write(json.dumps(trainer.evaluate(dataset['test'])))
        f.write('\n')
        f.close()
        
        model.save_all_adapters(training_args.output_dir, with_head=False)
        # model.save_pretrained(f"{adapter_name}")
        
        # if DAPT_n_TAPT:
        #     trainer = AdapterTrainer(
        #         model=model,
        #         args=training_args,
        #         train_dataset=TAPT_dataset["train"],
        #         eval_dataset=TAPT_dataset["validation"],
        #         data_collator=data_collator,  
        #         callbacks=[writer] 
        #     )
            
        #     trainer.train()
        
        #     f = open("DAPT_TAPT_evaulations.txt", "a")
        #     f.write(adapter_name)
        #     f.write(trainer.evaluate(TAPT_dataset['test']))
        #     f.write('\n')
        #     f.close()
            
        #     model.save_pretrained(f"{adapter_name}_DAPT_TAPT")

DAPT Training

In [11]:
# training_args = TrainingArguments(
#     learning_rate=5e-4,
#     num_train_epochs=1,
#     per_device_train_batch_size=32,
#     per_device_eval_batch_size=32,
#     logging_steps=100,
#     output_dir="./training_output/pretraining/DAPT",
#     overwrite_output_dir=True,
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=True,
#     evaluation_strategy = 'steps',
#     # load_best_model_at_end = True,
#     save_steps = 100,
#     gradient_accumulation_steps = 64,
#     warmup_ratio = 0.06,
#     weight_decay=0.01,
#     adam_epsilon = 1e-6,
# )

In [12]:
# pretraining_loop(num_models = 5, 
#                  training_args = training_args, 
#                 #  dataset = DAPT_dataset, TODO: Need to add DAPT training set
#                  data_collator = data_collator, 
#                  adapter_name = "DAPT_sci-erc")

DAPT+TAPT Training

TAPT Training

In [13]:
training_args = TrainingArguments(
    learning_rate=0.0001,
    num_train_epochs=100,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=10,
    output_dir="./training_output/pretraining/TAPT",
    overwrite_output_dir=True,
    remove_unused_columns=True,
    evaluation_strategy = 'steps',
    # load_best_model_at_end = True,
    save_steps = 100,
    gradient_accumulation_steps = 9,
    warmup_ratio = 0.06,
    # load_best_model_at_end = True,
    weight_decay=0.01,
    adam_epsilon = 1e-6,
)

In [14]:
pretraining_loop(num_models = 2, 
                 training_args = training_args, 
                 dataset = dataset_pretraining, 
                 data_collator = data_collator, 
                 adapter_name = f"TAPT_{dataset_name}")

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaAdapterModel: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You

{'loss': 17.6086, 'learning_rate': 1.5151515151515153e-05, 'epoch': 0.85}


                                                 
  1%|          | 10/1100 [00:21<32:59,  1.82s/it]

{'eval_loss': 17.298175811767578, 'eval_runtime': 0.6836, 'eval_samples_per_second': 166.759, 'eval_steps_per_second': 11.702, 'epoch': 0.85}


  2%|▏         | 20/1100 [00:40<32:19,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 18.022, 'learning_rate': 3.0303030303030306e-05, 'epoch': 1.76}


                                                 
  2%|▏         | 20/1100 [00:41<32:19,  1.80s/it]

{'eval_loss': 16.029788970947266, 'eval_runtime': 0.6856, 'eval_samples_per_second': 166.272, 'eval_steps_per_second': 11.668, 'epoch': 1.76}


  3%|▎         | 30/1100 [01:00<32:16,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 16.642, 'learning_rate': 4.545454545454546e-05, 'epoch': 2.68}


                                                 
  3%|▎         | 30/1100 [01:00<32:16,  1.81s/it]

{'eval_loss': 14.507796287536621, 'eval_runtime': 0.6866, 'eval_samples_per_second': 166.03, 'eval_steps_per_second': 11.651, 'epoch': 2.68}


  4%|▎         | 40/1100 [01:19<32:16,  1.83s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 15.0501, 'learning_rate': 6.060606060606061e-05, 'epoch': 3.59}


                                                 
  4%|▎         | 40/1100 [01:20<32:16,  1.83s/it]

{'eval_loss': 13.04714298248291, 'eval_runtime': 0.6686, 'eval_samples_per_second': 170.504, 'eval_steps_per_second': 11.965, 'epoch': 3.59}


  5%|▍         | 50/1100 [01:39<32:20,  1.85s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 13.3241, 'learning_rate': 7.575757575757576e-05, 'epoch': 4.51}


                                                 
  5%|▍         | 50/1100 [01:40<32:20,  1.85s/it]

{'eval_loss': 11.394652366638184, 'eval_runtime': 0.6916, 'eval_samples_per_second': 164.829, 'eval_steps_per_second': 11.567, 'epoch': 4.51}


  5%|▌         | 60/1100 [01:59<32:35,  1.88s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 11.6461, 'learning_rate': 9.090909090909092e-05, 'epoch': 5.42}


                                                 
  5%|▌         | 60/1100 [01:59<32:35,  1.88s/it]

{'eval_loss': 9.870506286621094, 'eval_runtime': 0.6976, 'eval_samples_per_second': 163.41, 'eval_steps_per_second': 11.467, 'epoch': 5.42}


  6%|▋         | 70/1100 [02:18<32:11,  1.88s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 10.0186, 'learning_rate': 9.961315280464217e-05, 'epoch': 6.34}


                                                 
  6%|▋         | 70/1100 [02:19<32:11,  1.88s/it]

{'eval_loss': 8.427194595336914, 'eval_runtime': 0.6916, 'eval_samples_per_second': 164.829, 'eval_steps_per_second': 11.567, 'epoch': 6.34}


  7%|▋         | 80/1100 [02:38<33:34,  1.98s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 8.7526, 'learning_rate': 9.864603481624759e-05, 'epoch': 7.25}


                                                 
  7%|▋         | 80/1100 [02:39<33:34,  1.98s/it]

{'eval_loss': 7.620426654815674, 'eval_runtime': 0.6966, 'eval_samples_per_second': 163.645, 'eval_steps_per_second': 11.484, 'epoch': 7.25}


  8%|▊         | 90/1100 [02:58<34:48,  2.07s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 8.0214, 'learning_rate': 9.767891682785301e-05, 'epoch': 8.17}


                                                 
  8%|▊         | 90/1100 [02:58<34:48,  2.07s/it]

{'eval_loss': 7.029154300689697, 'eval_runtime': 0.6966, 'eval_samples_per_second': 163.645, 'eval_steps_per_second': 11.484, 'epoch': 8.17}


  9%|▉         | 100/1100 [03:17<36:22,  2.18s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 7.558, 'learning_rate': 9.671179883945843e-05, 'epoch': 9.08}


                                                  
  9%|▉         | 100/1100 [03:18<36:22,  2.18s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-100
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\head_config.json


{'eval_loss': 6.800355434417725, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 9.08}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_0\pytorch_model_head.bin
 10%|█         | 110/1100 [03:37<29:57,  1.82s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.7775, 'learning_rate': 9.574468085106384e-05, 'epoch': 9.93}


                                                  
 10%|█         | 110/1100 [03:38<29:57,  1.82s/it]

{'eval_loss': 6.446367263793945, 'eval_runtime': 0.6916, 'eval_samples_per_second': 164.829, 'eval_steps_per_second': 11.567, 'epoch': 9.93}


 11%|█         | 120/1100 [03:57<29:34,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 7.1047, 'learning_rate': 9.477756286266924e-05, 'epoch': 10.85}


                                                  
 11%|█         | 120/1100 [03:57<29:34,  1.81s/it]

{'eval_loss': 6.330708980560303, 'eval_runtime': 0.6926, 'eval_samples_per_second': 164.59, 'eval_steps_per_second': 11.55, 'epoch': 10.85}


 12%|█▏        | 130/1100 [04:17<29:19,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.9344, 'learning_rate': 9.381044487427466e-05, 'epoch': 11.76}


                                                  
 12%|█▏        | 130/1100 [04:17<29:19,  1.81s/it]

{'eval_loss': 6.210508823394775, 'eval_runtime': 0.7016, 'eval_samples_per_second': 162.477, 'eval_steps_per_second': 11.402, 'epoch': 11.76}


 13%|█▎        | 140/1100 [04:36<29:14,  1.83s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.7691, 'learning_rate': 9.284332688588008e-05, 'epoch': 12.68}


                                                  
 13%|█▎        | 140/1100 [04:37<29:14,  1.83s/it]

{'eval_loss': 6.144393444061279, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 12.68}


 14%|█▎        | 150/1100 [04:56<29:11,  1.84s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.7078, 'learning_rate': 9.187620889748549e-05, 'epoch': 13.59}


                                                  
 14%|█▎        | 150/1100 [04:57<29:11,  1.84s/it]

{'eval_loss': 5.914926528930664, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 13.59}


 15%|█▍        | 160/1100 [05:16<29:07,  1.86s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.5624, 'learning_rate': 9.090909090909092e-05, 'epoch': 14.51}


                                                  
 15%|█▍        | 160/1100 [05:17<29:07,  1.86s/it]

{'eval_loss': 5.976020336151123, 'eval_runtime': 0.6976, 'eval_samples_per_second': 163.41, 'eval_steps_per_second': 11.467, 'epoch': 14.51}


 15%|█▌        | 170/1100 [05:36<29:17,  1.89s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.4461, 'learning_rate': 8.994197292069633e-05, 'epoch': 15.42}


                                                  
 15%|█▌        | 170/1100 [05:37<29:17,  1.89s/it]

{'eval_loss': 5.805785655975342, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 15.42}


 16%|█▋        | 180/1100 [05:56<29:34,  1.93s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.4669, 'learning_rate': 8.897485493230174e-05, 'epoch': 16.34}


                                                  
 16%|█▋        | 180/1100 [05:57<29:34,  1.93s/it]

{'eval_loss': 5.7239179611206055, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 16.34}


 17%|█▋        | 190/1100 [06:15<29:24,  1.94s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.3658, 'learning_rate': 8.800773694390716e-05, 'epoch': 17.25}


                                                  
 17%|█▋        | 190/1100 [06:16<29:24,  1.94s/it]

{'eval_loss': 5.7908453941345215, 'eval_runtime': 0.6876, 'eval_samples_per_second': 165.788, 'eval_steps_per_second': 11.634, 'epoch': 17.25}


 18%|█▊        | 200/1100 [06:35<31:00,  2.07s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.3175, 'learning_rate': 8.704061895551258e-05, 'epoch': 18.17}


                                                  
 18%|█▊        | 200/1100 [06:36<31:00,  2.07s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-200
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\head_config.json


{'eval_loss': 5.747984886169434, 'eval_runtime': 0.7006, 'eval_samples_per_second': 162.709, 'eval_steps_per_second': 11.418, 'epoch': 18.17}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_0\pytorch_model_head.bin
 19%|█▉        | 210/1100 [06:56<32:39,  2.20s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.2542, 'learning_rate': 8.6073500967118e-05, 'epoch': 19.08}


                                                  
 19%|█▉        | 210/1100 [06:57<32:39,  2.20s/it]

{'eval_loss': 5.69124174118042, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 19.08}


 20%|██        | 220/1100 [07:15<26:29,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7599, 'learning_rate': 8.510638297872341e-05, 'epoch': 19.93}


                                                  
 20%|██        | 220/1100 [07:15<26:29,  1.81s/it]

{'eval_loss': 5.867902755737305, 'eval_runtime': 0.6976, 'eval_samples_per_second': 163.41, 'eval_steps_per_second': 11.467, 'epoch': 19.93}


 21%|██        | 230/1100 [07:34<26:15,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.1799, 'learning_rate': 8.413926499032882e-05, 'epoch': 20.85}


                                                  
 21%|██        | 230/1100 [07:35<26:15,  1.81s/it]

{'eval_loss': 5.768768787384033, 'eval_runtime': 0.6966, 'eval_samples_per_second': 163.645, 'eval_steps_per_second': 11.484, 'epoch': 20.85}


 22%|██▏       | 240/1100 [07:54<26:01,  1.82s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.1052, 'learning_rate': 8.317214700193425e-05, 'epoch': 21.76}


                                                  
 22%|██▏       | 240/1100 [07:55<26:01,  1.82s/it]

{'eval_loss': 5.547813415527344, 'eval_runtime': 0.7026, 'eval_samples_per_second': 162.246, 'eval_steps_per_second': 11.386, 'epoch': 21.76}


 23%|██▎       | 250/1100 [08:14<25:52,  1.83s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.0731, 'learning_rate': 8.220502901353966e-05, 'epoch': 22.68}


                                                  
 23%|██▎       | 250/1100 [08:15<25:52,  1.83s/it]

{'eval_loss': 5.610992431640625, 'eval_runtime': 0.6966, 'eval_samples_per_second': 163.645, 'eval_steps_per_second': 11.484, 'epoch': 22.68}


 24%|██▎       | 260/1100 [08:34<25:45,  1.84s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.0691, 'learning_rate': 8.123791102514507e-05, 'epoch': 23.59}


                                                  
 24%|██▎       | 260/1100 [08:35<25:45,  1.84s/it]

{'eval_loss': 5.649265289306641, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 23.59}


 25%|██▍       | 270/1100 [08:54<25:43,  1.86s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9732, 'learning_rate': 8.027079303675049e-05, 'epoch': 24.51}


                                                  
 25%|██▍       | 270/1100 [08:55<25:43,  1.86s/it]

{'eval_loss': 5.436666965484619, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 24.51}


 25%|██▌       | 280/1100 [09:14<25:53,  1.89s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9811, 'learning_rate': 7.930367504835591e-05, 'epoch': 25.42}


                                                  
 25%|██▌       | 280/1100 [09:15<25:53,  1.89s/it]

{'eval_loss': 5.505371570587158, 'eval_runtime': 0.6976, 'eval_samples_per_second': 163.41, 'eval_steps_per_second': 11.467, 'epoch': 25.42}


 26%|██▋       | 290/1100 [09:34<26:04,  1.93s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8952, 'learning_rate': 7.833655705996133e-05, 'epoch': 26.34}


                                                  
 26%|██▋       | 290/1100 [09:34<26:04,  1.93s/it]

{'eval_loss': 5.4165940284729, 'eval_runtime': 0.7066, 'eval_samples_per_second': 161.327, 'eval_steps_per_second': 11.321, 'epoch': 26.34}


 27%|██▋       | 300/1100 [09:54<26:31,  1.99s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9077, 'learning_rate': 7.736943907156673e-05, 'epoch': 27.25}


                                                  
 27%|██▋       | 300/1100 [09:54<26:31,  1.99s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-300
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\head_config.json


{'eval_loss': 5.40991735458374, 'eval_runtime': 0.6976, 'eval_samples_per_second': 163.41, 'eval_steps_per_second': 11.467, 'epoch': 27.25}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_0\pytorch_model_head.bin
 28%|██▊       | 310/1100 [10:14<27:19,  2.08s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8737, 'learning_rate': 7.640232108317214e-05, 'epoch': 28.17}


                                                  
 28%|██▊       | 310/1100 [10:15<27:19,  2.08s/it]

{'eval_loss': 5.395690441131592, 'eval_runtime': 0.6986, 'eval_samples_per_second': 163.176, 'eval_steps_per_second': 11.451, 'epoch': 28.17}


 29%|██▉       | 320/1100 [10:34<28:25,  2.19s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8897, 'learning_rate': 7.543520309477757e-05, 'epoch': 29.08}


                                                  
 29%|██▉       | 320/1100 [10:35<28:25,  2.19s/it]

{'eval_loss': 5.560412406921387, 'eval_runtime': 0.6846, 'eval_samples_per_second': 166.516, 'eval_steps_per_second': 11.685, 'epoch': 29.08}


 30%|███       | 330/1100 [10:52<23:09,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.424, 'learning_rate': 7.446808510638298e-05, 'epoch': 29.93}


                                                  
 30%|███       | 330/1100 [10:53<23:09,  1.80s/it]

{'eval_loss': 5.3951334953308105, 'eval_runtime': 0.6806, 'eval_samples_per_second': 167.495, 'eval_steps_per_second': 11.754, 'epoch': 29.93}


 31%|███       | 340/1100 [11:12<22:54,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8509, 'learning_rate': 7.350096711798839e-05, 'epoch': 30.85}


                                                  
 31%|███       | 340/1100 [11:13<22:54,  1.81s/it]

{'eval_loss': 5.339297771453857, 'eval_runtime': 0.6916, 'eval_samples_per_second': 164.829, 'eval_steps_per_second': 11.567, 'epoch': 30.85}


 32%|███▏      | 350/1100 [11:32<22:40,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7947, 'learning_rate': 7.25338491295938e-05, 'epoch': 31.76}


                                                  
 32%|███▏      | 350/1100 [11:33<22:40,  1.81s/it]

{'eval_loss': 5.387465000152588, 'eval_runtime': 0.6936, 'eval_samples_per_second': 164.353, 'eval_steps_per_second': 11.534, 'epoch': 31.76}


 33%|███▎      | 360/1100 [11:51<22:01,  1.79s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7259, 'learning_rate': 7.156673114119923e-05, 'epoch': 32.68}


                                                  
 33%|███▎      | 360/1100 [11:52<22:01,  1.79s/it]

{'eval_loss': 5.397671699523926, 'eval_runtime': 0.6926, 'eval_samples_per_second': 164.591, 'eval_steps_per_second': 11.55, 'epoch': 32.68}


 34%|███▎      | 370/1100 [12:11<22:24,  1.84s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7447, 'learning_rate': 7.059961315280465e-05, 'epoch': 33.59}


                                                  
 34%|███▎      | 370/1100 [12:12<22:24,  1.84s/it]

{'eval_loss': 5.366695880889893, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 33.59}


 35%|███▍      | 380/1100 [12:31<22:19,  1.86s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6996, 'learning_rate': 6.963249516441006e-05, 'epoch': 34.51}


                                                  
 35%|███▍      | 380/1100 [12:32<22:19,  1.86s/it]

{'eval_loss': 5.320949077606201, 'eval_runtime': 0.6966, 'eval_samples_per_second': 163.645, 'eval_steps_per_second': 11.484, 'epoch': 34.51}


 35%|███▌      | 390/1100 [12:51<22:21,  1.89s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7288, 'learning_rate': 6.866537717601547e-05, 'epoch': 35.42}


                                                  
 35%|███▌      | 390/1100 [12:52<22:21,  1.89s/it]

{'eval_loss': 5.253283500671387, 'eval_runtime': 0.6996, 'eval_samples_per_second': 162.942, 'eval_steps_per_second': 11.435, 'epoch': 35.42}


 36%|███▋      | 400/1100 [13:11<22:30,  1.93s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6656, 'learning_rate': 6.76982591876209e-05, 'epoch': 36.34}


                                                  
 36%|███▋      | 400/1100 [13:11<22:30,  1.93s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-400
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\head_config.json


{'eval_loss': 5.253695487976074, 'eval_runtime': 0.6936, 'eval_samples_per_second': 164.353, 'eval_steps_per_second': 11.534, 'epoch': 36.34}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_0\pytorch_model_head.bin
 37%|███▋      | 410/1100 [13:31<22:32,  1.96s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6676, 'learning_rate': 6.673114119922631e-05, 'epoch': 37.25}


                                                  
 37%|███▋      | 410/1100 [13:32<22:32,  1.96s/it]

{'eval_loss': 5.138484477996826, 'eval_runtime': 0.6996, 'eval_samples_per_second': 162.942, 'eval_steps_per_second': 11.435, 'epoch': 37.25}


 38%|███▊      | 420/1100 [13:51<23:27,  2.07s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6534, 'learning_rate': 6.576402321083172e-05, 'epoch': 38.17}


                                                  
 38%|███▊      | 420/1100 [13:52<23:27,  2.07s/it]

{'eval_loss': 5.188788890838623, 'eval_runtime': 0.6916, 'eval_samples_per_second': 164.829, 'eval_steps_per_second': 11.567, 'epoch': 38.17}


 39%|███▉      | 430/1100 [14:11<24:24,  2.19s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.619, 'learning_rate': 6.479690522243714e-05, 'epoch': 39.08}


                                                  
 39%|███▉      | 430/1100 [14:11<24:24,  2.19s/it]

{'eval_loss': 5.173781394958496, 'eval_runtime': 0.6976, 'eval_samples_per_second': 163.41, 'eval_steps_per_second': 11.467, 'epoch': 39.08}


 40%|████      | 440/1100 [14:29<19:49,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2072, 'learning_rate': 6.382978723404256e-05, 'epoch': 39.93}


                                                  
 40%|████      | 440/1100 [14:30<19:49,  1.80s/it]

{'eval_loss': 5.262930393218994, 'eval_runtime': 0.6926, 'eval_samples_per_second': 164.591, 'eval_steps_per_second': 11.55, 'epoch': 39.93}


 41%|████      | 450/1100 [14:49<19:39,  1.82s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5712, 'learning_rate': 6.286266924564798e-05, 'epoch': 40.85}


                                                  
 41%|████      | 450/1100 [14:50<19:39,  1.82s/it]

{'eval_loss': 5.197827339172363, 'eval_runtime': 0.7006, 'eval_samples_per_second': 162.709, 'eval_steps_per_second': 11.418, 'epoch': 40.85}


 42%|████▏     | 460/1100 [15:09<19:22,  1.82s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5706, 'learning_rate': 6.189555125725339e-05, 'epoch': 41.76}


                                                  
 42%|████▏     | 460/1100 [15:10<19:22,  1.82s/it]

{'eval_loss': 5.108599662780762, 'eval_runtime': 0.6936, 'eval_samples_per_second': 164.353, 'eval_steps_per_second': 11.534, 'epoch': 41.76}


 43%|████▎     | 470/1100 [15:29<19:10,  1.83s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5479, 'learning_rate': 6.092843326885881e-05, 'epoch': 42.68}


                                                  
 43%|████▎     | 470/1100 [15:29<19:10,  1.83s/it]

{'eval_loss': 5.073381423950195, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 42.68}


 44%|████▎     | 480/1100 [15:49<19:03,  1.84s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.546, 'learning_rate': 5.9961315280464216e-05, 'epoch': 43.59}


                                                  
 44%|████▎     | 480/1100 [15:49<19:03,  1.84s/it]

{'eval_loss': 5.060088157653809, 'eval_runtime': 0.6996, 'eval_samples_per_second': 162.942, 'eval_steps_per_second': 11.435, 'epoch': 43.59}


 45%|████▍     | 490/1100 [16:09<18:57,  1.87s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4665, 'learning_rate': 5.899419729206963e-05, 'epoch': 44.51}


                                                  
 45%|████▍     | 490/1100 [16:09<18:57,  1.87s/it]

{'eval_loss': 5.146588325500488, 'eval_runtime': 0.6936, 'eval_samples_per_second': 164.353, 'eval_steps_per_second': 11.534, 'epoch': 44.51}


 45%|████▌     | 500/1100 [16:28<18:53,  1.89s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6022, 'learning_rate': 5.802707930367505e-05, 'epoch': 45.42}


                                                  
 45%|████▌     | 500/1100 [16:29<18:53,  1.89s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-500
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\head_config.json


{'eval_loss': 5.110910415649414, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 45.42}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_0\pytorch_model_head.bin
 46%|████▋     | 510/1100 [16:49<18:36,  1.89s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5289, 'learning_rate': 5.705996131528046e-05, 'epoch': 46.34}


                                                  
 46%|████▋     | 510/1100 [16:49<18:36,  1.89s/it]

{'eval_loss': 5.087550163269043, 'eval_runtime': 0.6806, 'eval_samples_per_second': 167.495, 'eval_steps_per_second': 11.754, 'epoch': 46.34}


 47%|████▋     | 520/1100 [17:08<18:45,  1.94s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5113, 'learning_rate': 5.609284332688588e-05, 'epoch': 47.25}


                                                  
 47%|████▋     | 520/1100 [17:09<18:45,  1.94s/it]

{'eval_loss': 5.083967208862305, 'eval_runtime': 0.6776, 'eval_samples_per_second': 168.237, 'eval_steps_per_second': 11.806, 'epoch': 47.25}


 48%|████▊     | 530/1100 [17:28<19:41,  2.07s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.443, 'learning_rate': 5.5125725338491294e-05, 'epoch': 48.17}


                                                  
 48%|████▊     | 530/1100 [17:29<19:41,  2.07s/it]

{'eval_loss': 5.083690643310547, 'eval_runtime': 0.6816, 'eval_samples_per_second': 167.249, 'eval_steps_per_second': 11.737, 'epoch': 48.17}


 49%|████▉     | 540/1100 [17:48<20:28,  2.19s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4846, 'learning_rate': 5.4158607350096714e-05, 'epoch': 49.08}


                                                  
 49%|████▉     | 540/1100 [17:49<20:28,  2.19s/it]

{'eval_loss': 5.11810302734375, 'eval_runtime': 0.6966, 'eval_samples_per_second': 163.645, 'eval_steps_per_second': 11.484, 'epoch': 49.08}


 50%|█████     | 550/1100 [18:06<16:34,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.0861, 'learning_rate': 5.319148936170213e-05, 'epoch': 49.93}


                                                  
 50%|█████     | 550/1100 [18:07<16:34,  1.81s/it]

{'eval_loss': 5.01719856262207, 'eval_runtime': 0.6826, 'eval_samples_per_second': 167.004, 'eval_steps_per_second': 11.72, 'epoch': 49.93}


 51%|█████     | 560/1100 [18:26<16:17,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4411, 'learning_rate': 5.222437137330755e-05, 'epoch': 50.85}


                                                  
 51%|█████     | 560/1100 [18:27<16:17,  1.81s/it]

{'eval_loss': 4.9864678382873535, 'eval_runtime': 0.6806, 'eval_samples_per_second': 167.495, 'eval_steps_per_second': 11.754, 'epoch': 50.85}


 52%|█████▏    | 570/1100 [18:46<15:36,  1.77s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4046, 'learning_rate': 5.125725338491296e-05, 'epoch': 51.76}


                                                  
 52%|█████▏    | 570/1100 [18:46<15:36,  1.77s/it]

{'eval_loss': 5.0667195320129395, 'eval_runtime': 0.6766, 'eval_samples_per_second': 168.486, 'eval_steps_per_second': 11.824, 'epoch': 51.76}


 53%|█████▎    | 580/1100 [19:05<15:48,  1.82s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4244, 'learning_rate': 5.029013539651838e-05, 'epoch': 52.68}


                                                  
 53%|█████▎    | 580/1100 [19:06<15:48,  1.82s/it]

{'eval_loss': 5.098927974700928, 'eval_runtime': 0.6826, 'eval_samples_per_second': 167.004, 'eval_steps_per_second': 11.72, 'epoch': 52.68}


 54%|█████▎    | 590/1100 [19:25<15:35,  1.83s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4453, 'learning_rate': 4.932301740812379e-05, 'epoch': 53.59}


                                                  
 54%|█████▎    | 590/1100 [19:26<15:35,  1.83s/it]

{'eval_loss': 5.061947822570801, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 53.59}


 55%|█████▍    | 600/1100 [19:45<15:31,  1.86s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3828, 'learning_rate': 4.835589941972921e-05, 'epoch': 54.51}


                                                  
 55%|█████▍    | 600/1100 [19:45<15:31,  1.86s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-600
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\head_config.json


{'eval_loss': 5.070886135101318, 'eval_runtime': 0.6996, 'eval_samples_per_second': 162.942, 'eval_steps_per_second': 11.435, 'epoch': 54.51}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_0\pytorch_model_head.bin
 55%|█████▌    | 610/1100 [20:05<15:08,  1.85s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3959, 'learning_rate': 4.738878143133462e-05, 'epoch': 55.42}


                                                  
 55%|█████▌    | 610/1100 [20:06<15:08,  1.85s/it]

{'eval_loss': 4.98222017288208, 'eval_runtime': 0.6756, 'eval_samples_per_second': 168.736, 'eval_steps_per_second': 11.841, 'epoch': 55.42}


 56%|█████▋    | 620/1100 [20:24<15:02,  1.88s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3631, 'learning_rate': 4.642166344294004e-05, 'epoch': 56.34}


                                                  
 56%|█████▋    | 620/1100 [20:25<15:02,  1.88s/it]

{'eval_loss': 5.0571818351745605, 'eval_runtime': 0.6766, 'eval_samples_per_second': 168.486, 'eval_steps_per_second': 11.824, 'epoch': 56.34}


 57%|█████▋    | 630/1100 [20:44<15:33,  1.99s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3425, 'learning_rate': 4.545454545454546e-05, 'epoch': 57.25}


                                                  
 57%|█████▋    | 630/1100 [20:45<15:33,  1.99s/it]

{'eval_loss': 5.208597183227539, 'eval_runtime': 0.6816, 'eval_samples_per_second': 167.249, 'eval_steps_per_second': 11.737, 'epoch': 57.25}


 58%|█████▊    | 640/1100 [21:04<15:50,  2.07s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3675, 'learning_rate': 4.448742746615087e-05, 'epoch': 58.17}


                                                  
 58%|█████▊    | 640/1100 [21:05<15:50,  2.07s/it]

{'eval_loss': 4.9748053550720215, 'eval_runtime': 0.7016, 'eval_samples_per_second': 162.478, 'eval_steps_per_second': 11.402, 'epoch': 58.17}


 59%|█████▉    | 650/1100 [21:24<16:26,  2.19s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3347, 'learning_rate': 4.352030947775629e-05, 'epoch': 59.08}


                                                  
 59%|█████▉    | 650/1100 [21:24<16:26,  2.19s/it]

{'eval_loss': 4.950099945068359, 'eval_runtime': 0.6806, 'eval_samples_per_second': 167.495, 'eval_steps_per_second': 11.754, 'epoch': 59.08}


 60%|██████    | 660/1100 [21:42<13:03,  1.78s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.9564, 'learning_rate': 4.2553191489361704e-05, 'epoch': 59.93}


                                                  
 60%|██████    | 660/1100 [21:43<13:03,  1.78s/it]

{'eval_loss': 4.998556613922119, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 59.93}


 61%|██████    | 670/1100 [22:01<12:39,  1.77s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2987, 'learning_rate': 4.1586073500967124e-05, 'epoch': 60.85}


                                                  
 61%|██████    | 670/1100 [22:02<12:39,  1.77s/it]

{'eval_loss': 5.007339000701904, 'eval_runtime': 0.6806, 'eval_samples_per_second': 167.495, 'eval_steps_per_second': 11.754, 'epoch': 60.85}


 62%|██████▏   | 680/1100 [22:21<12:38,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3003, 'learning_rate': 4.061895551257254e-05, 'epoch': 61.76}


                                                  
 62%|██████▏   | 680/1100 [22:22<12:38,  1.81s/it]

{'eval_loss': 4.853215217590332, 'eval_runtime': 0.6926, 'eval_samples_per_second': 164.59, 'eval_steps_per_second': 11.55, 'epoch': 61.76}


 63%|██████▎   | 690/1100 [22:41<12:29,  1.83s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2665, 'learning_rate': 3.965183752417796e-05, 'epoch': 62.68}


                                                  
 63%|██████▎   | 690/1100 [22:41<12:29,  1.83s/it]

{'eval_loss': 4.973938941955566, 'eval_runtime': 0.6756, 'eval_samples_per_second': 168.736, 'eval_steps_per_second': 11.841, 'epoch': 62.68}


 64%|██████▎   | 700/1100 [23:00<11:52,  1.78s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3172, 'learning_rate': 3.868471953578336e-05, 'epoch': 63.59}


                                                  
 64%|██████▎   | 700/1100 [23:01<11:52,  1.78s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-700
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\head_config.json


{'eval_loss': 4.950514316558838, 'eval_runtime': 0.6746, 'eval_samples_per_second': 168.986, 'eval_steps_per_second': 11.859, 'epoch': 63.59}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_0\pytorch_model_head.bin
 65%|██████▍   | 710/1100 [23:21<12:09,  1.87s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3067, 'learning_rate': 3.771760154738878e-05, 'epoch': 64.51}


                                                  
 65%|██████▍   | 710/1100 [23:22<12:09,  1.87s/it]

{'eval_loss': 5.02278470993042, 'eval_runtime': 0.6796, 'eval_samples_per_second': 167.742, 'eval_steps_per_second': 11.771, 'epoch': 64.51}


 65%|██████▌   | 720/1100 [23:41<11:58,  1.89s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2377, 'learning_rate': 3.6750483558994196e-05, 'epoch': 65.42}


                                                  
 65%|██████▌   | 720/1100 [23:41<11:58,  1.89s/it]

{'eval_loss': 4.952370643615723, 'eval_runtime': 0.6806, 'eval_samples_per_second': 167.495, 'eval_steps_per_second': 11.754, 'epoch': 65.42}


 66%|██████▋   | 730/1100 [24:00<11:44,  1.90s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2814, 'learning_rate': 3.5783365570599616e-05, 'epoch': 66.34}


                                                  
 66%|██████▋   | 730/1100 [24:01<11:44,  1.90s/it]

{'eval_loss': 4.938517093658447, 'eval_runtime': 0.6786, 'eval_samples_per_second': 167.989, 'eval_steps_per_second': 11.789, 'epoch': 66.34}


 67%|██████▋   | 740/1100 [24:20<11:55,  1.99s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2532, 'learning_rate': 3.481624758220503e-05, 'epoch': 67.25}


                                                  
 67%|██████▋   | 740/1100 [24:21<11:55,  1.99s/it]

{'eval_loss': 4.899240493774414, 'eval_runtime': 0.6886, 'eval_samples_per_second': 165.547, 'eval_steps_per_second': 11.617, 'epoch': 67.25}


 68%|██████▊   | 750/1100 [24:40<12:03,  2.07s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.227, 'learning_rate': 3.384912959381045e-05, 'epoch': 68.17}


                                                  
 68%|██████▊   | 750/1100 [24:40<12:03,  2.07s/it]

{'eval_loss': 4.904717922210693, 'eval_runtime': 0.6796, 'eval_samples_per_second': 167.742, 'eval_steps_per_second': 11.771, 'epoch': 68.17}


 69%|██████▉   | 760/1100 [24:59<12:23,  2.19s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2607, 'learning_rate': 3.288201160541586e-05, 'epoch': 69.08}


                                                  
 69%|██████▉   | 760/1100 [25:00<12:23,  2.19s/it]

{'eval_loss': 4.963433742523193, 'eval_runtime': 0.6956, 'eval_samples_per_second': 163.88, 'eval_steps_per_second': 11.5, 'epoch': 69.08}


 70%|███████   | 770/1100 [25:18<09:53,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.8668, 'learning_rate': 3.191489361702128e-05, 'epoch': 69.93}


                                                  
 70%|███████   | 770/1100 [25:18<09:53,  1.80s/it]

{'eval_loss': 4.954813003540039, 'eval_runtime': 0.6816, 'eval_samples_per_second': 167.249, 'eval_steps_per_second': 11.737, 'epoch': 69.93}


 71%|███████   | 780/1100 [25:37<09:38,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2384, 'learning_rate': 3.0947775628626695e-05, 'epoch': 70.85}


                                                  
 71%|███████   | 780/1100 [25:38<09:38,  1.81s/it]

{'eval_loss': 4.934167385101318, 'eval_runtime': 0.6786, 'eval_samples_per_second': 167.989, 'eval_steps_per_second': 11.789, 'epoch': 70.85}


 72%|███████▏  | 790/1100 [25:57<09:23,  1.82s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1958, 'learning_rate': 2.9980657640232108e-05, 'epoch': 71.76}


                                                  
 72%|███████▏  | 790/1100 [25:58<09:23,  1.82s/it]

{'eval_loss': 4.875357627868652, 'eval_runtime': 0.6946, 'eval_samples_per_second': 164.116, 'eval_steps_per_second': 11.517, 'epoch': 71.76}


 73%|███████▎  | 800/1100 [26:17<08:57,  1.79s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2387, 'learning_rate': 2.9013539651837524e-05, 'epoch': 72.68}


                                                  
 73%|███████▎  | 800/1100 [26:17<08:57,  1.79s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-800
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\head_config.json


{'eval_loss': 4.83001184463501, 'eval_runtime': 0.6816, 'eval_samples_per_second': 167.249, 'eval_steps_per_second': 11.737, 'epoch': 72.68}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_0\pytorch_model_head.bin
 74%|███████▎  | 810/1100 [26:37<08:54,  1.84s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1846, 'learning_rate': 2.804642166344294e-05, 'epoch': 73.59}


                                                  
 74%|███████▎  | 810/1100 [26:38<08:54,  1.84s/it]

{'eval_loss': 4.940943717956543, 'eval_runtime': 0.6386, 'eval_samples_per_second': 178.521, 'eval_steps_per_second': 12.528, 'epoch': 73.59}


 75%|███████▍  | 820/1100 [26:55<07:34,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1951, 'learning_rate': 2.7079303675048357e-05, 'epoch': 74.51}


                                                  
 75%|███████▍  | 820/1100 [26:55<07:34,  1.62s/it]

{'eval_loss': 4.8856892585754395, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 74.51}


 75%|███████▌  | 830/1100 [27:12<07:23,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1997, 'learning_rate': 2.6112185686653773e-05, 'epoch': 75.42}


                                                  
 75%|███████▌  | 830/1100 [27:12<07:23,  1.64s/it]

{'eval_loss': 4.847349643707275, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 75.42}


 76%|███████▋  | 840/1100 [27:29<07:16,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1408, 'learning_rate': 2.514506769825919e-05, 'epoch': 76.34}


                                                  
 76%|███████▋  | 840/1100 [27:30<07:16,  1.68s/it]

{'eval_loss': 4.886765956878662, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 76.34}


 77%|███████▋  | 850/1100 [27:46<07:12,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1665, 'learning_rate': 2.4177949709864606e-05, 'epoch': 77.25}


                                                  
 77%|███████▋  | 850/1100 [27:47<07:12,  1.73s/it]

{'eval_loss': 4.898924350738525, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 77.25}


 78%|███████▊  | 860/1100 [28:04<07:11,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1709, 'learning_rate': 2.321083172147002e-05, 'epoch': 78.17}


                                                  
 78%|███████▊  | 860/1100 [28:04<07:11,  1.80s/it]

{'eval_loss': 4.8176398277282715, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 78.17}


 79%|███████▉  | 870/1100 [28:21<07:18,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2015, 'learning_rate': 2.2243713733075436e-05, 'epoch': 79.08}


                                                  
 79%|███████▉  | 870/1100 [28:22<07:18,  1.91s/it]

{'eval_loss': 4.769500255584717, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 79.08}


 80%|████████  | 880/1100 [28:37<05:45,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.8066, 'learning_rate': 2.1276595744680852e-05, 'epoch': 79.93}


                                                  
 80%|████████  | 880/1100 [28:38<05:45,  1.57s/it]

{'eval_loss': 4.994579792022705, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 79.93}


 81%|████████  | 890/1100 [28:54<05:30,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1881, 'learning_rate': 2.030947775628627e-05, 'epoch': 80.85}


                                                  
 81%|████████  | 890/1100 [28:55<05:30,  1.57s/it]

{'eval_loss': 4.826125621795654, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 80.85}


 82%|████████▏ | 900/1100 [29:12<05:16,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.176, 'learning_rate': 1.934235976789168e-05, 'epoch': 81.76}


                                                  
 82%|████████▏ | 900/1100 [29:12<05:16,  1.58s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-900
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\head_config.json


{'eval_loss': 4.835247993469238, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 81.76}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_0\pytorch_model_head.bin
 83%|████████▎ | 910/1100 [29:30<05:04,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1837, 'learning_rate': 1.8375241779497098e-05, 'epoch': 82.68}


                                                  
 83%|████████▎ | 910/1100 [29:30<05:04,  1.60s/it]

{'eval_loss': 4.937137126922607, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 82.68}


 84%|████████▎ | 920/1100 [29:47<04:48,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1254, 'learning_rate': 1.7408123791102515e-05, 'epoch': 83.59}


                                                  
 84%|████████▎ | 920/1100 [29:48<04:48,  1.60s/it]

{'eval_loss': 4.8610429763793945, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 83.59}


 85%|████████▍ | 930/1100 [30:04<04:35,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1745, 'learning_rate': 1.644100580270793e-05, 'epoch': 84.51}


                                                  
 85%|████████▍ | 930/1100 [30:05<04:35,  1.62s/it]

{'eval_loss': 4.825605392456055, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 84.51}


 85%|████████▌ | 940/1100 [30:22<04:23,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1699, 'learning_rate': 1.5473887814313347e-05, 'epoch': 85.42}


                                                  
 85%|████████▌ | 940/1100 [30:22<04:23,  1.64s/it]

{'eval_loss': 4.858495712280273, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 85.42}


 86%|████████▋ | 950/1100 [30:39<04:11,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.126, 'learning_rate': 1.4506769825918762e-05, 'epoch': 86.34}


                                                  
 86%|████████▋ | 950/1100 [30:40<04:11,  1.68s/it]

{'eval_loss': 4.811083793640137, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 86.34}


 87%|████████▋ | 960/1100 [30:56<04:02,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1808, 'learning_rate': 1.3539651837524179e-05, 'epoch': 87.25}


                                                  
 87%|████████▋ | 960/1100 [30:57<04:02,  1.73s/it]

{'eval_loss': 4.893993377685547, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 87.25}


 88%|████████▊ | 970/1100 [31:14<03:53,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1317, 'learning_rate': 1.2572533849129595e-05, 'epoch': 88.17}


                                                  
 88%|████████▊ | 970/1100 [31:14<03:53,  1.80s/it]

{'eval_loss': 4.904691696166992, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 88.17}


 89%|████████▉ | 980/1100 [31:31<03:48,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1673, 'learning_rate': 1.160541586073501e-05, 'epoch': 89.08}


                                                  
 89%|████████▉ | 980/1100 [31:31<03:48,  1.91s/it]

{'eval_loss': 4.788512229919434, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 89.08}


 90%|█████████ | 990/1100 [31:47<02:52,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.7833, 'learning_rate': 1.0638297872340426e-05, 'epoch': 89.93}


                                                  
 90%|█████████ | 990/1100 [31:48<02:52,  1.57s/it]

{'eval_loss': 4.799863815307617, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 89.93}


 91%|█████████ | 1000/1100 [32:04<02:37,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1515, 'learning_rate': 9.67117988394584e-06, 'epoch': 90.85}


                                                   
 91%|█████████ | 1000/1100 [32:05<02:37,  1.57s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-1000
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\head_config.json


{'eval_loss': 4.931614875793457, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 90.85}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_0\pytorch_model_head.bin
 92%|█████████▏| 1010/1100 [32:22<02:23,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1582, 'learning_rate': 8.704061895551257e-06, 'epoch': 91.76}


                                                   
 92%|█████████▏| 1010/1100 [32:23<02:23,  1.59s/it]

{'eval_loss': 4.890052795410156, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 91.76}


 93%|█████████▎| 1020/1100 [32:40<02:07,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1504, 'learning_rate': 7.736943907156674e-06, 'epoch': 92.68}


                                                   
 93%|█████████▎| 1020/1100 [32:40<02:07,  1.59s/it]

{'eval_loss': 4.843925476074219, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 92.68}


 94%|█████████▎| 1030/1100 [32:57<01:52,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1015, 'learning_rate': 6.769825918762089e-06, 'epoch': 93.59}


                                                   
 94%|█████████▎| 1030/1100 [32:58<01:52,  1.60s/it]

{'eval_loss': 4.744523048400879, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 93.59}


 95%|█████████▍| 1040/1100 [33:14<01:36,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1566, 'learning_rate': 5.802707930367505e-06, 'epoch': 94.51}


                                                   
 95%|█████████▍| 1040/1100 [33:15<01:36,  1.62s/it]

{'eval_loss': 4.8261494636535645, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 94.51}


 95%|█████████▌| 1050/1100 [33:32<01:22,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1425, 'learning_rate': 4.83558994197292e-06, 'epoch': 95.42}


                                                   
 95%|█████████▌| 1050/1100 [33:32<01:22,  1.64s/it]

{'eval_loss': 4.839756488800049, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 95.42}


 96%|█████████▋| 1060/1100 [33:49<01:07,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.0677, 'learning_rate': 3.868471953578337e-06, 'epoch': 96.34}


                                                   
 96%|█████████▋| 1060/1100 [33:49<01:07,  1.68s/it]

{'eval_loss': 4.708547592163086, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 96.34}


 97%|█████████▋| 1070/1100 [34:06<00:51,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1677, 'learning_rate': 2.9013539651837524e-06, 'epoch': 97.25}


                                                   
 97%|█████████▋| 1070/1100 [34:07<00:51,  1.73s/it]

{'eval_loss': 4.9109272956848145, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 97.25}


 98%|█████████▊| 1080/1100 [34:23<00:35,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.119, 'learning_rate': 1.9342359767891684e-06, 'epoch': 98.17}


                                                   
 98%|█████████▊| 1080/1100 [34:24<00:35,  1.80s/it]

{'eval_loss': 4.905938625335693, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.827, 'eval_steps_per_second': 13.321, 'epoch': 98.17}


 99%|█████████▉| 1090/1100 [34:41<00:19,  1.90s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1183, 'learning_rate': 9.671179883945842e-07, 'epoch': 99.08}


                                                   
 99%|█████████▉| 1090/1100 [34:41<00:19,  1.90s/it]

{'eval_loss': 4.821847915649414, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 99.08}


100%|██████████| 1100/1100 [34:57<00:00,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.7834, 'learning_rate': 0.0, 'epoch': 99.93}


                                                   
100%|██████████| 1100/1100 [34:57<00:00,  1.57s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-1100
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\head_config.json


{'eval_loss': 4.7869038581848145, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 99.93}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_0\pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1100/1100 [34:58<00:00,  1.91s/it]
***** Running Evaluation *****
  Num examples = 139
  Batch size = 16


{'train_runtime': 2098.8593, 'train_samples_per_second': 80.425, 'train_steps_per_second': 0.524, 'train_loss': 6.172132488597523, 'epoch': 99.93}


100%|██████████| 9/9 [00:00<00:00, 12.58it/s]
Configuration saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_0\pytorch_model_head.bin
loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\Users\The Doctor/.cache\huggingface\transformers\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b
Model config RobertaConfig {
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 76

{'loss': 17.6368, 'learning_rate': 1.5151515151515153e-05, 'epoch': 0.85}


                                                 
  1%|          | 10/1100 [00:16<28:15,  1.56s/it]

{'eval_loss': 17.365793228149414, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 0.85}


  2%|▏         | 20/1100 [00:32<28:23,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 18.3481, 'learning_rate': 3.0303030303030306e-05, 'epoch': 1.76}


                                                 
  2%|▏         | 20/1100 [00:33<28:23,  1.58s/it]

{'eval_loss': 16.42099952697754, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 1.76}


  3%|▎         | 30/1100 [00:50<28:22,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 17.2091, 'learning_rate': 4.545454545454546e-05, 'epoch': 2.68}


                                                 
  3%|▎         | 30/1100 [00:50<28:22,  1.59s/it]

{'eval_loss': 15.038393020629883, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 2.68}


  4%|▎         | 40/1100 [01:07<28:13,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 15.5795, 'learning_rate': 6.060606060606061e-05, 'epoch': 3.59}


                                                 
  4%|▎         | 40/1100 [01:07<28:13,  1.60s/it]

{'eval_loss': 13.449366569519043, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 3.59}


  5%|▍         | 50/1100 [01:24<28:21,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 13.6748, 'learning_rate': 7.575757575757576e-05, 'epoch': 4.51}


                                                 
  5%|▍         | 50/1100 [01:25<28:21,  1.62s/it]

{'eval_loss': 11.607220649719238, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 4.51}


  5%|▌         | 60/1100 [01:41<28:28,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 11.7735, 'learning_rate': 9.090909090909092e-05, 'epoch': 5.42}


                                                 
  5%|▌         | 60/1100 [01:42<28:28,  1.64s/it]

{'eval_loss': 9.927935600280762, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 5.42}


  6%|▋         | 70/1100 [01:59<28:49,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 10.0747, 'learning_rate': 9.961315280464217e-05, 'epoch': 6.34}


                                                 
  6%|▋         | 70/1100 [01:59<28:49,  1.68s/it]

{'eval_loss': 8.498238563537598, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 6.34}


  7%|▋         | 80/1100 [02:16<29:23,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 8.8366, 'learning_rate': 9.864603481624759e-05, 'epoch': 7.25}


                                                 
  7%|▋         | 80/1100 [02:17<29:23,  1.73s/it]

{'eval_loss': 7.744973659515381, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 7.25}


  8%|▊         | 90/1100 [02:33<30:19,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 8.1413, 'learning_rate': 9.767891682785301e-05, 'epoch': 8.17}


                                                 
  8%|▊         | 90/1100 [02:34<30:19,  1.80s/it]

{'eval_loss': 7.166064262390137, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 8.17}


  9%|▉         | 100/1100 [02:51<31:46,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 7.6835, 'learning_rate': 9.671179883945843e-05, 'epoch': 9.08}


                                                  
  9%|▉         | 100/1100 [02:51<31:46,  1.91s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-100
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\head_config.json


{'eval_loss': 6.914406776428223, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 9.08}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-100\TAPT_citation_intent_1\pytorch_model_head.bin
 10%|█         | 110/1100 [03:08<26:05,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.8973, 'learning_rate': 9.574468085106384e-05, 'epoch': 9.93}


                                                  
 10%|█         | 110/1100 [03:08<26:05,  1.58s/it]

{'eval_loss': 6.60264778137207, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 9.93}


 11%|█         | 120/1100 [03:25<25:43,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 7.2278, 'learning_rate': 9.477756286266924e-05, 'epoch': 10.85}


                                                  
 11%|█         | 120/1100 [03:25<25:43,  1.57s/it]

{'eval_loss': 6.481110572814941, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.144, 'eval_steps_per_second': 13.343, 'epoch': 10.85}


 12%|█▏        | 130/1100 [03:42<25:33,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 7.0389, 'learning_rate': 9.381044487427466e-05, 'epoch': 11.76}


                                                  
 12%|█▏        | 130/1100 [03:43<25:33,  1.58s/it]

{'eval_loss': 6.292665481567383, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 11.76}


 13%|█▎        | 140/1100 [03:59<25:24,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.8846, 'learning_rate': 9.284332688588008e-05, 'epoch': 12.68}


                                                  
 13%|█▎        | 140/1100 [04:00<25:24,  1.59s/it]

{'eval_loss': 6.247900485992432, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 12.68}


 14%|█▎        | 150/1100 [04:17<25:24,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.8146, 'learning_rate': 9.187620889748549e-05, 'epoch': 13.59}


                                                  
 14%|█▎        | 150/1100 [04:17<25:24,  1.60s/it]

{'eval_loss': 6.032499313354492, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 13.59}


 15%|█▍        | 160/1100 [04:34<25:19,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.6635, 'learning_rate': 9.090909090909092e-05, 'epoch': 14.51}


                                                  
 15%|█▍        | 160/1100 [04:35<25:19,  1.62s/it]

{'eval_loss': 6.077471733093262, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 14.51}


 15%|█▌        | 170/1100 [04:51<25:28,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.5495, 'learning_rate': 8.994197292069633e-05, 'epoch': 15.42}


                                                  
 15%|█▌        | 170/1100 [04:52<25:28,  1.64s/it]

{'eval_loss': 5.931332111358643, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 15.42}


 16%|█▋        | 180/1100 [05:08<25:42,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.5673, 'learning_rate': 8.897485493230174e-05, 'epoch': 16.34}


                                                  
 16%|█▋        | 180/1100 [05:09<25:42,  1.68s/it]

{'eval_loss': 5.813364028930664, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 16.34}


 17%|█▋        | 190/1100 [05:26<26:15,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.4639, 'learning_rate': 8.800773694390716e-05, 'epoch': 17.25}


                                                  
 17%|█▋        | 190/1100 [05:26<26:15,  1.73s/it]

{'eval_loss': 5.867549419403076, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 17.25}


 18%|█▊        | 200/1100 [05:43<26:58,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.4012, 'learning_rate': 8.704061895551258e-05, 'epoch': 18.17}


                                                  
 18%|█▊        | 200/1100 [05:44<26:58,  1.80s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-200
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\head_config.json


{'eval_loss': 5.841189384460449, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 18.17}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-200\TAPT_citation_intent_1\pytorch_model_head.bin
 19%|█▉        | 210/1100 [06:01<28:22,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.3586, 'learning_rate': 8.6073500967118e-05, 'epoch': 19.08}


                                                  
 19%|█▉        | 210/1100 [06:02<28:22,  1.91s/it]

{'eval_loss': 5.817245960235596, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 19.08}


 20%|██        | 220/1100 [06:17<23:02,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8463, 'learning_rate': 8.510638297872341e-05, 'epoch': 19.93}


                                                  
 20%|██        | 220/1100 [06:18<23:02,  1.57s/it]

{'eval_loss': 5.957585334777832, 'eval_runtime': 0.5985, 'eval_samples_per_second': 190.463, 'eval_steps_per_second': 13.366, 'epoch': 19.93}


 21%|██        | 230/1100 [06:35<22:50,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.2756, 'learning_rate': 8.413926499032882e-05, 'epoch': 20.85}


                                                  
 21%|██        | 230/1100 [06:35<22:50,  1.58s/it]

{'eval_loss': 5.834672927856445, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 20.85}


 22%|██▏       | 240/1100 [06:52<22:37,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.1972, 'learning_rate': 8.317214700193425e-05, 'epoch': 21.76}


                                                  
 22%|██▏       | 240/1100 [06:52<22:37,  1.58s/it]

{'eval_loss': 5.631331443786621, 'eval_runtime': 0.5995, 'eval_samples_per_second': 190.145, 'eval_steps_per_second': 13.343, 'epoch': 21.76}


 23%|██▎       | 250/1100 [07:09<22:30,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.1669, 'learning_rate': 8.220502901353966e-05, 'epoch': 22.68}


                                                  
 23%|██▎       | 250/1100 [07:10<22:30,  1.59s/it]

{'eval_loss': 5.691175937652588, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 22.68}


 24%|██▎       | 260/1100 [07:26<22:24,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.1661, 'learning_rate': 8.123791102514507e-05, 'epoch': 23.59}


                                                  
 24%|██▎       | 260/1100 [07:27<22:24,  1.60s/it]

{'eval_loss': 5.7235918045043945, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 23.59}


 25%|██▍       | 270/1100 [07:44<22:24,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.0629, 'learning_rate': 8.027079303675049e-05, 'epoch': 24.51}


                                                  
 25%|██▍       | 270/1100 [07:44<22:24,  1.62s/it]

{'eval_loss': 5.511183738708496, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 24.51}


 25%|██▌       | 280/1100 [08:01<22:26,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 6.0706, 'learning_rate': 7.930367504835591e-05, 'epoch': 25.42}


                                                  
 25%|██▌       | 280/1100 [08:02<22:26,  1.64s/it]

{'eval_loss': 5.572716236114502, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 25.42}


 26%|██▋       | 290/1100 [08:18<22:40,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9911, 'learning_rate': 7.833655705996133e-05, 'epoch': 26.34}


                                                  
 26%|██▋       | 290/1100 [08:19<22:40,  1.68s/it]

{'eval_loss': 5.480025768280029, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 26.34}


 27%|██▋       | 300/1100 [08:35<23:01,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9848, 'learning_rate': 7.736943907156673e-05, 'epoch': 27.25}


                                                  
 27%|██▋       | 300/1100 [08:36<23:01,  1.73s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-300
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\head_config.json


{'eval_loss': 5.509681224822998, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 27.25}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-300\TAPT_citation_intent_1\pytorch_model_head.bin
 28%|██▊       | 310/1100 [08:54<23:49,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9632, 'learning_rate': 7.640232108317214e-05, 'epoch': 28.17}


                                                  
 28%|██▊       | 310/1100 [08:54<23:49,  1.81s/it]

{'eval_loss': 5.460699558258057, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 28.17}


 29%|██▉       | 320/1100 [09:14<29:18,  2.25s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9689, 'learning_rate': 7.543520309477757e-05, 'epoch': 29.08}


                                                  
 29%|██▉       | 320/1100 [09:15<29:18,  2.25s/it]

{'eval_loss': 5.605604648590088, 'eval_runtime': 0.6926, 'eval_samples_per_second': 164.591, 'eval_steps_per_second': 11.55, 'epoch': 29.08}


 30%|███       | 330/1100 [09:33<21:52,  1.70s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5155, 'learning_rate': 7.446808510638298e-05, 'epoch': 29.93}


                                                  
 30%|███       | 330/1100 [09:33<21:52,  1.70s/it]

{'eval_loss': 5.45537805557251, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 29.93}


 31%|███       | 340/1100 [09:50<20:01,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.9347, 'learning_rate': 7.350096711798839e-05, 'epoch': 30.85}


                                                  
 31%|███       | 340/1100 [09:50<20:01,  1.58s/it]

{'eval_loss': 5.399775981903076, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 30.85}


 32%|███▏      | 350/1100 [10:07<19:49,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8802, 'learning_rate': 7.25338491295938e-05, 'epoch': 31.76}


                                                  
 32%|███▏      | 350/1100 [10:08<19:49,  1.59s/it]

{'eval_loss': 5.441259860992432, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 31.76}


 33%|███▎      | 360/1100 [10:24<19:40,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8127, 'learning_rate': 7.156673114119923e-05, 'epoch': 32.68}


                                                  
 33%|███▎      | 360/1100 [10:25<19:40,  1.60s/it]

{'eval_loss': 5.4774489402771, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 32.68}


 34%|███▎      | 370/1100 [10:42<19:32,  1.61s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8379, 'learning_rate': 7.059961315280465e-05, 'epoch': 33.59}


                                                  
 34%|███▎      | 370/1100 [10:42<19:32,  1.61s/it]

{'eval_loss': 5.430126190185547, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 33.59}


 35%|███▍      | 380/1100 [10:59<19:28,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7881, 'learning_rate': 6.963249516441006e-05, 'epoch': 34.51}


                                                  
 35%|███▍      | 380/1100 [11:00<19:28,  1.62s/it]

{'eval_loss': 5.381536483764648, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 34.51}


 35%|███▌      | 390/1100 [11:16<19:31,  1.65s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.8078, 'learning_rate': 6.866537717601547e-05, 'epoch': 35.42}


                                                  
 35%|███▌      | 390/1100 [11:17<19:31,  1.65s/it]

{'eval_loss': 5.346465587615967, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 35.42}


 36%|███▋      | 400/1100 [11:34<19:39,  1.69s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7488, 'learning_rate': 6.76982591876209e-05, 'epoch': 36.34}


                                                  
 36%|███▋      | 400/1100 [11:34<19:39,  1.69s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-400
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\head_config.json


{'eval_loss': 5.321605682373047, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 36.34}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-400\TAPT_citation_intent_1\pytorch_model_head.bin
 37%|███▋      | 410/1100 [11:52<20:06,  1.75s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.751, 'learning_rate': 6.673114119922631e-05, 'epoch': 37.25}


                                                  
 37%|███▋      | 410/1100 [11:53<20:06,  1.75s/it]

{'eval_loss': 5.210147857666016, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 37.25}


 38%|███▊      | 420/1100 [12:09<20:30,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7315, 'learning_rate': 6.576402321083172e-05, 'epoch': 38.17}


                                                  
 38%|███▊      | 420/1100 [12:10<20:30,  1.81s/it]

{'eval_loss': 5.240194320678711, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 38.17}


 39%|███▉      | 430/1100 [12:27<21:19,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.7062, 'learning_rate': 6.479690522243714e-05, 'epoch': 39.08}


                                                  
 39%|███▉      | 430/1100 [12:27<21:19,  1.91s/it]

{'eval_loss': 5.2208662033081055, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 39.08}


 40%|████      | 440/1100 [12:43<17:19,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2814, 'learning_rate': 6.382978723404256e-05, 'epoch': 39.93}


                                                  
 40%|████      | 440/1100 [12:44<17:19,  1.58s/it]

{'eval_loss': 5.336589813232422, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 39.93}


 41%|████      | 450/1100 [13:00<17:07,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6583, 'learning_rate': 6.286266924564798e-05, 'epoch': 40.85}


                                                  
 41%|████      | 450/1100 [13:01<17:07,  1.58s/it]

{'eval_loss': 5.278772354125977, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 40.85}


 42%|████▏     | 460/1100 [13:18<16:55,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6529, 'learning_rate': 6.189555125725339e-05, 'epoch': 41.76}


                                                  
 42%|████▏     | 460/1100 [13:18<16:55,  1.59s/it]

{'eval_loss': 5.1788530349731445, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 41.76}


 43%|████▎     | 470/1100 [13:35<16:44,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6288, 'learning_rate': 6.092843326885881e-05, 'epoch': 42.68}


                                                  
 43%|████▎     | 470/1100 [13:36<16:44,  1.59s/it]

{'eval_loss': 5.1396098136901855, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 42.68}


 44%|████▎     | 480/1100 [13:52<16:36,  1.61s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6249, 'learning_rate': 5.9961315280464216e-05, 'epoch': 43.59}


                                                  
 44%|████▎     | 480/1100 [13:53<16:36,  1.61s/it]

{'eval_loss': 5.121478080749512, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 43.59}


 45%|████▍     | 490/1100 [14:10<16:29,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5482, 'learning_rate': 5.899419729206963e-05, 'epoch': 44.51}


                                                  
 45%|████▍     | 490/1100 [14:10<16:29,  1.62s/it]

{'eval_loss': 5.222922325134277, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 44.51}


 45%|████▌     | 500/1100 [14:27<16:29,  1.65s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6806, 'learning_rate': 5.802707930367505e-05, 'epoch': 45.42}


                                                  
 45%|████▌     | 500/1100 [14:28<16:29,  1.65s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-500
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\head_config.json


{'eval_loss': 5.175995349884033, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 45.42}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-500\TAPT_citation_intent_1\pytorch_model_head.bin
 46%|████▋     | 510/1100 [14:45<16:41,  1.70s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.6107, 'learning_rate': 5.705996131528046e-05, 'epoch': 46.34}


                                                  
 46%|████▋     | 510/1100 [14:46<16:41,  1.70s/it]

{'eval_loss': 5.170414447784424, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 46.34}


 47%|████▋     | 520/1100 [15:03<16:46,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5969, 'learning_rate': 5.609284332688588e-05, 'epoch': 47.25}


                                                  
 47%|████▋     | 520/1100 [15:03<16:46,  1.73s/it]

{'eval_loss': 5.185304641723633, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 47.25}


 48%|████▊     | 530/1100 [15:20<17:10,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.526, 'learning_rate': 5.5125725338491294e-05, 'epoch': 48.17}


                                                  
 48%|████▊     | 530/1100 [15:21<17:10,  1.81s/it]

{'eval_loss': 5.157038688659668, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 48.17}


 49%|████▉     | 540/1100 [15:37<17:48,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5672, 'learning_rate': 5.4158607350096714e-05, 'epoch': 49.08}


                                                  
 49%|████▉     | 540/1100 [15:38<17:48,  1.91s/it]

{'eval_loss': 5.180668354034424, 'eval_runtime': 0.6066, 'eval_samples_per_second': 187.948, 'eval_steps_per_second': 13.189, 'epoch': 49.08}


 50%|█████     | 550/1100 [15:53<14:26,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1567, 'learning_rate': 5.319148936170213e-05, 'epoch': 49.93}


                                                  
 50%|█████     | 550/1100 [15:54<14:26,  1.57s/it]

{'eval_loss': 5.068484783172607, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 49.93}


 51%|█████     | 560/1100 [16:11<14:12,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5136, 'learning_rate': 5.222437137330755e-05, 'epoch': 50.85}


                                                  
 51%|█████     | 560/1100 [16:11<14:12,  1.58s/it]

{'eval_loss': 5.0250678062438965, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 50.85}


 52%|█████▏    | 570/1100 [16:28<14:00,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4856, 'learning_rate': 5.125725338491296e-05, 'epoch': 51.76}


                                                  
 52%|█████▏    | 570/1100 [16:29<14:00,  1.59s/it]

{'eval_loss': 5.138667583465576, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 51.76}


 53%|█████▎    | 580/1100 [16:45<13:49,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4975, 'learning_rate': 5.029013539651838e-05, 'epoch': 52.68}


                                                  
 53%|█████▎    | 580/1100 [16:46<13:49,  1.59s/it]

{'eval_loss': 5.163283824920654, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 52.68}


 54%|█████▎    | 590/1100 [17:03<13:39,  1.61s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.5112, 'learning_rate': 4.932301740812379e-05, 'epoch': 53.59}


                                                  
 54%|█████▎    | 590/1100 [17:03<13:39,  1.61s/it]

{'eval_loss': 5.122529983520508, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 53.59}


 55%|█████▍    | 600/1100 [17:20<13:32,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4532, 'learning_rate': 4.835589941972921e-05, 'epoch': 54.51}


                                                  
 55%|█████▍    | 600/1100 [17:21<13:32,  1.62s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-600
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\head_config.json


{'eval_loss': 5.132264614105225, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 54.51}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-600\TAPT_citation_intent_1\pytorch_model_head.bin
 55%|█████▌    | 610/1100 [17:38<13:33,  1.66s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4679, 'learning_rate': 4.738878143133462e-05, 'epoch': 55.42}


                                                  
 55%|█████▌    | 610/1100 [17:39<13:33,  1.66s/it]

{'eval_loss': 5.050955295562744, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 55.42}


 56%|█████▋    | 620/1100 [17:56<13:27,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4386, 'learning_rate': 4.642166344294004e-05, 'epoch': 56.34}


                                                  
 56%|█████▋    | 620/1100 [17:56<13:27,  1.68s/it]

{'eval_loss': 5.141866683959961, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 56.34}


 57%|█████▋    | 630/1100 [18:13<13:34,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4129, 'learning_rate': 4.545454545454546e-05, 'epoch': 57.25}


                                                  
 57%|█████▋    | 630/1100 [18:14<13:34,  1.73s/it]

{'eval_loss': 5.272117614746094, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 57.25}


 58%|█████▊    | 640/1100 [18:30<13:50,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4449, 'learning_rate': 4.448742746615087e-05, 'epoch': 58.17}


                                                  
 58%|█████▊    | 640/1100 [18:31<13:50,  1.80s/it]

{'eval_loss': 5.019345283508301, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 58.17}


 59%|█████▉    | 650/1100 [18:48<14:18,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.4128, 'learning_rate': 4.352030947775629e-05, 'epoch': 59.08}


                                                  
 59%|█████▉    | 650/1100 [18:48<14:18,  1.91s/it]

{'eval_loss': 4.986405849456787, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 59.08}


 60%|██████    | 660/1100 [19:04<11:32,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.0201, 'learning_rate': 4.2553191489361704e-05, 'epoch': 59.93}


                                                  
 60%|██████    | 660/1100 [19:04<11:32,  1.57s/it]

{'eval_loss': 5.061737060546875, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 59.93}


 61%|██████    | 670/1100 [19:21<11:19,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3742, 'learning_rate': 4.1586073500967124e-05, 'epoch': 60.85}


                                                  
 61%|██████    | 670/1100 [19:22<11:19,  1.58s/it]

{'eval_loss': 5.071314811706543, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 60.85}


 62%|██████▏   | 680/1100 [19:38<11:05,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3724, 'learning_rate': 4.061895551257254e-05, 'epoch': 61.76}


                                                  
 62%|██████▏   | 680/1100 [19:39<11:05,  1.59s/it]

{'eval_loss': 4.8632612228393555, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 61.76}


 63%|██████▎   | 690/1100 [19:56<10:53,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3337, 'learning_rate': 3.965183752417796e-05, 'epoch': 62.68}


                                                  
 63%|██████▎   | 690/1100 [19:56<10:53,  1.59s/it]

{'eval_loss': 5.0124735832214355, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 62.68}


 64%|██████▎   | 700/1100 [20:13<10:42,  1.61s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3898, 'learning_rate': 3.868471953578336e-05, 'epoch': 63.59}


                                                  
 64%|██████▎   | 700/1100 [20:14<10:42,  1.61s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-700
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\head_config.json


{'eval_loss': 5.012201309204102, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 63.59}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-700\TAPT_citation_intent_1\pytorch_model_head.bin
 65%|██████▍   | 710/1100 [20:31<10:38,  1.64s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3717, 'learning_rate': 3.771760154738878e-05, 'epoch': 64.51}


                                                  
 65%|██████▍   | 710/1100 [20:32<10:38,  1.64s/it]

{'eval_loss': 5.079471111297607, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 64.51}


 65%|██████▌   | 720/1100 [20:49<10:27,  1.65s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3068, 'learning_rate': 3.6750483558994196e-05, 'epoch': 65.42}


                                                  
 65%|██████▌   | 720/1100 [20:49<10:27,  1.65s/it]

{'eval_loss': 5.015259265899658, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 65.42}


 66%|██████▋   | 730/1100 [21:06<10:23,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3573, 'learning_rate': 3.5783365570599616e-05, 'epoch': 66.34}


                                                  
 66%|██████▋   | 730/1100 [21:07<10:23,  1.68s/it]

{'eval_loss': 4.966453552246094, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 66.34}


 67%|██████▋   | 740/1100 [21:23<10:24,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3249, 'learning_rate': 3.481624758220503e-05, 'epoch': 67.25}


                                                  
 67%|██████▋   | 740/1100 [21:24<10:24,  1.73s/it]

{'eval_loss': 4.927013397216797, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 67.25}


 68%|██████▊   | 750/1100 [21:41<10:32,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.296, 'learning_rate': 3.384912959381045e-05, 'epoch': 68.17}


                                                  
 68%|██████▊   | 750/1100 [21:41<10:32,  1.81s/it]

{'eval_loss': 4.948490619659424, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 68.17}


 69%|██████▉   | 760/1100 [21:58<10:49,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3288, 'learning_rate': 3.288201160541586e-05, 'epoch': 69.08}


                                                  
 69%|██████▉   | 760/1100 [21:59<10:49,  1.91s/it]

{'eval_loss': 5.00581169128418, 'eval_runtime': 0.6065, 'eval_samples_per_second': 187.948, 'eval_steps_per_second': 13.189, 'epoch': 69.08}


 70%|███████   | 770/1100 [22:14<08:40,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.928, 'learning_rate': 3.191489361702128e-05, 'epoch': 69.93}


                                                  
 70%|███████   | 770/1100 [22:15<08:40,  1.58s/it]

{'eval_loss': 5.03261137008667, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 69.93}


 71%|███████   | 780/1100 [22:32<08:25,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3061, 'learning_rate': 3.0947775628626695e-05, 'epoch': 70.85}


                                                  
 71%|███████   | 780/1100 [22:32<08:25,  1.58s/it]

{'eval_loss': 4.979866027832031, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 70.85}


 72%|███████▏  | 790/1100 [22:49<08:11,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2671, 'learning_rate': 2.9980657640232108e-05, 'epoch': 71.76}


                                                  
 72%|███████▏  | 790/1100 [22:50<08:11,  1.59s/it]

{'eval_loss': 4.903280735015869, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 71.76}


 73%|███████▎  | 800/1100 [23:06<07:57,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.3003, 'learning_rate': 2.9013539651837524e-05, 'epoch': 72.68}


                                                  
 73%|███████▎  | 800/1100 [23:07<07:57,  1.59s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-800
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\head_config.json


{'eval_loss': 4.884580612182617, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 72.68}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-800\TAPT_citation_intent_1\pytorch_model_head.bin
 74%|███████▎  | 810/1100 [23:25<07:49,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2565, 'learning_rate': 2.804642166344294e-05, 'epoch': 73.59}


                                                  
 74%|███████▎  | 810/1100 [23:25<07:49,  1.62s/it]

{'eval_loss': 4.969282627105713, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 73.59}


 75%|███████▍  | 820/1100 [23:42<07:34,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2686, 'learning_rate': 2.7079303675048357e-05, 'epoch': 74.51}


                                                  
 75%|███████▍  | 820/1100 [23:43<07:34,  1.62s/it]

{'eval_loss': 4.915503978729248, 'eval_runtime': 0.6005, 'eval_samples_per_second': 189.828, 'eval_steps_per_second': 13.321, 'epoch': 74.51}


 75%|███████▌  | 830/1100 [23:59<07:25,  1.65s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2693, 'learning_rate': 2.6112185686653773e-05, 'epoch': 75.42}


                                                  
 75%|███████▌  | 830/1100 [24:00<07:25,  1.65s/it]

{'eval_loss': 4.888894081115723, 'eval_runtime': 0.6066, 'eval_samples_per_second': 187.948, 'eval_steps_per_second': 13.189, 'epoch': 75.42}


 76%|███████▋  | 840/1100 [24:17<07:18,  1.69s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2034, 'learning_rate': 2.514506769825919e-05, 'epoch': 76.34}


                                                  
 76%|███████▋  | 840/1100 [24:17<07:18,  1.69s/it]

{'eval_loss': 4.9335713386535645, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 76.34}


 77%|███████▋  | 850/1100 [24:34<07:13,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.229, 'learning_rate': 2.4177949709864606e-05, 'epoch': 77.25}


                                                  
 77%|███████▋  | 850/1100 [24:35<07:13,  1.73s/it]

{'eval_loss': 4.960597038269043, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 77.25}


 78%|███████▊  | 860/1100 [24:51<07:13,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2308, 'learning_rate': 2.321083172147002e-05, 'epoch': 78.17}


                                                  
 78%|███████▊  | 860/1100 [24:52<07:13,  1.81s/it]

{'eval_loss': 4.857590198516846, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 78.17}


 79%|███████▉  | 870/1100 [25:09<07:19,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2726, 'learning_rate': 2.2243713733075436e-05, 'epoch': 79.08}


                                                  
 79%|███████▉  | 870/1100 [25:09<07:19,  1.91s/it]

{'eval_loss': 4.798332691192627, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 79.08}


 80%|████████  | 880/1100 [25:25<05:46,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.8661, 'learning_rate': 2.1276595744680852e-05, 'epoch': 79.93}


                                                  
 80%|████████  | 880/1100 [25:25<05:46,  1.58s/it]

{'eval_loss': 5.042816162109375, 'eval_runtime': 0.6015, 'eval_samples_per_second': 189.512, 'eval_steps_per_second': 13.299, 'epoch': 79.93}


 81%|████████  | 890/1100 [25:42<05:31,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2539, 'learning_rate': 2.030947775628627e-05, 'epoch': 80.85}


                                                  
 81%|████████  | 890/1100 [25:43<05:31,  1.58s/it]

{'eval_loss': 4.855782508850098, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 80.85}


 82%|████████▏ | 900/1100 [26:00<05:17,  1.59s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2388, 'learning_rate': 1.934235976789168e-05, 'epoch': 81.76}


                                                  
 82%|████████▏ | 900/1100 [26:00<05:17,  1.59s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-900
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\head_config.json


{'eval_loss': 4.8832831382751465, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 81.76}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-900\TAPT_citation_intent_1\pytorch_model_head.bin
 83%|████████▎ | 910/1100 [26:18<05:04,  1.61s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.25, 'learning_rate': 1.8375241779497098e-05, 'epoch': 82.68}


                                                  
 83%|████████▎ | 910/1100 [26:18<05:04,  1.61s/it]

{'eval_loss': 4.9596848487854, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 82.68}


 84%|████████▎ | 920/1100 [26:35<04:49,  1.61s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1925, 'learning_rate': 1.7408123791102515e-05, 'epoch': 83.59}


                                                  
 84%|████████▎ | 920/1100 [26:36<04:49,  1.61s/it]

{'eval_loss': 4.903000831604004, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 83.59}


 85%|████████▍ | 930/1100 [26:52<04:36,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2353, 'learning_rate': 1.644100580270793e-05, 'epoch': 84.51}


                                                  
 85%|████████▍ | 930/1100 [26:53<04:36,  1.62s/it]

{'eval_loss': 4.862441062927246, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 84.51}


 85%|████████▌ | 940/1100 [27:10<04:23,  1.65s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2378, 'learning_rate': 1.5473887814313347e-05, 'epoch': 85.42}


                                                  
 85%|████████▌ | 940/1100 [27:10<04:23,  1.65s/it]

{'eval_loss': 4.911202430725098, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 85.42}


 86%|████████▋ | 950/1100 [27:27<04:12,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1937, 'learning_rate': 1.4506769825918762e-05, 'epoch': 86.34}


                                                  
 86%|████████▋ | 950/1100 [27:28<04:12,  1.68s/it]

{'eval_loss': 4.838657379150391, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 86.34}


 87%|████████▋ | 960/1100 [27:44<04:02,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2404, 'learning_rate': 1.3539651837524179e-05, 'epoch': 87.25}


                                                  
 87%|████████▋ | 960/1100 [27:45<04:02,  1.73s/it]

{'eval_loss': 4.936162948608398, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 87.25}


 88%|████████▊ | 970/1100 [28:02<03:54,  1.80s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2021, 'learning_rate': 1.2572533849129595e-05, 'epoch': 88.17}


                                                  
 88%|████████▊ | 970/1100 [28:02<03:54,  1.80s/it]

{'eval_loss': 4.921852111816406, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 88.17}


 89%|████████▉ | 980/1100 [28:19<03:48,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2321, 'learning_rate': 1.160541586073501e-05, 'epoch': 89.08}


                                                  
 89%|████████▉ | 980/1100 [28:20<03:48,  1.91s/it]

{'eval_loss': 4.830608367919922, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 89.08}


 90%|█████████ | 990/1100 [28:35<02:52,  1.57s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.8458, 'learning_rate': 1.0638297872340426e-05, 'epoch': 89.93}


                                                  
 90%|█████████ | 990/1100 [28:36<02:52,  1.57s/it]

{'eval_loss': 4.824248313903809, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 89.93}


 91%|█████████ | 1000/1100 [28:53<02:37,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2182, 'learning_rate': 9.67117988394584e-06, 'epoch': 90.85}


                                                   
 91%|█████████ | 1000/1100 [28:53<02:37,  1.58s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-1000
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\head_config.json


{'eval_loss': 4.979782581329346, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 90.85}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1000\TAPT_citation_intent_1\pytorch_model_head.bin
 92%|█████████▏| 1010/1100 [29:11<02:23,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2179, 'learning_rate': 8.704061895551257e-06, 'epoch': 91.76}


                                                   
 92%|█████████▏| 1010/1100 [29:11<02:23,  1.60s/it]

{'eval_loss': 4.942434310913086, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 91.76}


 93%|█████████▎| 1020/1100 [29:28<02:07,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2121, 'learning_rate': 7.736943907156674e-06, 'epoch': 92.68}


                                                   
 93%|█████████▎| 1020/1100 [29:29<02:07,  1.60s/it]

{'eval_loss': 4.882311820983887, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 92.68}


 94%|█████████▎| 1030/1100 [29:46<01:52,  1.60s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1583, 'learning_rate': 6.769825918762089e-06, 'epoch': 93.59}


                                                   
 94%|█████████▎| 1030/1100 [29:46<01:52,  1.60s/it]

{'eval_loss': 4.7624077796936035, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 93.59}


 95%|█████████▍| 1040/1100 [30:03<01:37,  1.62s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2149, 'learning_rate': 5.802707930367505e-06, 'epoch': 94.51}


                                                   
 95%|█████████▍| 1040/1100 [30:03<01:37,  1.62s/it]

{'eval_loss': 4.870728969573975, 'eval_runtime': 0.6035, 'eval_samples_per_second': 188.883, 'eval_steps_per_second': 13.255, 'epoch': 94.51}


 95%|█████████▌| 1050/1100 [30:20<01:22,  1.65s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2064, 'learning_rate': 4.83558994197292e-06, 'epoch': 95.42}


                                                   
 95%|█████████▌| 1050/1100 [30:21<01:22,  1.65s/it]

{'eval_loss': 4.860422134399414, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.57, 'eval_steps_per_second': 13.233, 'epoch': 95.42}


 96%|█████████▋| 1060/1100 [30:38<01:07,  1.68s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1244, 'learning_rate': 3.868471953578337e-06, 'epoch': 96.34}


                                                   
 96%|█████████▋| 1060/1100 [30:38<01:07,  1.68s/it]

{'eval_loss': 4.741482257843018, 'eval_runtime': 0.6025, 'eval_samples_per_second': 189.197, 'eval_steps_per_second': 13.277, 'epoch': 96.34}


 97%|█████████▋| 1070/1100 [30:55<00:52,  1.73s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.2283, 'learning_rate': 2.9013539651837524e-06, 'epoch': 97.25}


                                                   
 97%|█████████▋| 1070/1100 [30:55<00:52,  1.73s/it]

{'eval_loss': 4.942307949066162, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 97.25}


 98%|█████████▊| 1080/1100 [31:12<00:36,  1.81s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1806, 'learning_rate': 1.9342359767891684e-06, 'epoch': 98.17}


                                                   
 98%|█████████▊| 1080/1100 [31:13<00:36,  1.81s/it]

{'eval_loss': 4.922201633453369, 'eval_runtime': 0.6045, 'eval_samples_per_second': 188.571, 'eval_steps_per_second': 13.233, 'epoch': 98.17}


 99%|█████████▉| 1090/1100 [31:29<00:19,  1.91s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 5.1863, 'learning_rate': 9.671179883945842e-07, 'epoch': 99.08}


                                                   
 99%|█████████▉| 1090/1100 [31:30<00:19,  1.91s/it]

{'eval_loss': 4.856062889099121, 'eval_runtime': 0.6066, 'eval_samples_per_second': 187.948, 'eval_steps_per_second': 13.189, 'epoch': 99.08}


100%|██████████| 1100/1100 [31:46<00:00,  1.58s/it]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 4.8393, 'learning_rate': 0.0, 'epoch': 99.93}


                                                   
100%|██████████| 1100/1100 [31:46<00:00,  1.58s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-1100
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\head_config.json


{'eval_loss': 4.795557498931885, 'eval_runtime': 0.6055, 'eval_samples_per_second': 188.259, 'eval_steps_per_second': 13.211, 'epoch': 99.93}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1100\TAPT_citation_intent_1\pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1100/1100 [31:47<00:00,  1.73s/it]
***** Running Evaluation *****
  Num examples = 139
  Batch size = 16


{'train_runtime': 1907.7905, 'train_samples_per_second': 88.479, 'train_steps_per_second': 0.577, 'train_loss': 6.2635156735506925, 'epoch': 99.93}


100%|██████████| 9/9 [00:00<00:00, 12.45it/s]
Configuration saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\TAPT_citation_intent_1\pytorch_model_head.bin


Fine Tuning Models

In [13]:
from datasets import load_metric
metric = load_metric('f1')

def compute_metric(EvalPrediction):
  
  logits, labels = EvalPrediction
  predictions = np.argmax(logits, axis=-1)
  return metric.compute(predictions=predictions, references=labels, average= 'macro')

In [10]:
def finetuning_loop(num_models, training_args, dataset, adapter_name, num_labels, load_adapter = False, adapter_dir = 'Path'):
    """The loop for finetuning num_models number of models to account for run2run variance

    Args:
        num_models (int): Number of models to loop through
        training_args (transformers.TrainingArguments): The arguments to pass to the trainer
        dataset (dataset): The dataset to train on
        adapter_name (str): Name of the adapter to create/load
        num_labels (int): Number of labels for classification task
        load_adapter (bool, optional): Whether to load the adapter based on adapter_name. Defaults to False.
        adapter_dir (str, optional): Path to the adapter to load when load_adapter is True. Defaults to 'Path'.
    """

    for i in range(num_models):
        adapter = f"{adapter_name}_{i}"
        model = model_init(adapter_name = adapter, num_lables = num_labels, pretraining=False, load_adapter = load_adapter, adapter_dir = f"{adapter_dir}/{adapter}")
        
        writer = SummaryWriter(log_dir= f'runs/{adapter}')
        writer = TensorBoardCallback(writer)

        trainer = AdapterTrainer(
            model=model,
            args=training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            callbacks=[writer],
            compute_metrics = compute_metric 
        )
        
        trainer.train()
        
        f = open(f"{training_args.output_dir}/evaulations.txt", "a")
        f.write(adapter)
        f.write(json.dumps(trainer.evaluate(dataset['test'])))
        f.write('\n')
        f.close()
        
        # model.save_pretrained(f"{adapter_name}")
        model.save_all_adapters(training_args.output_dir)
        
        trainer.remove_callback(writer)

DAPT Finetuning

In [17]:
# training_args = TrainingArguments(
#     learning_rate=1e-4,
#     num_train_epochs=50,
#     per_device_train_batch_size=32,
#     per_device_eval_batch_size=32,
#     logging_steps=100,
#     output_dir="./training_output/finetuning/DAPT",
#     overwrite_output_dir=True,
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=False,
#     evaluation_strategy = 'epoch',
#     # load_best_model_at_end = True,
#     save_steps = 100
# )

In [18]:
# finetuning_loop(num_models = 5, 
#                  training_args = training_args, 
#                  dataset = scierc_dataset_finetuning,  
#                  adapter_name = "DAPT_sci-erc",
#                  load_adapter = True)

DAPT+TAPT Finetuning

In [19]:
# training_args = TrainingArguments(
#     learning_rate=1e-4,
#     num_train_epochs=50,
#     per_device_train_batch_size=32,
#     per_device_eval_batch_size=32,
#     logging_steps=10,
#     output_dir="./training_output/finetuning/DAPT_TAPT",
#     overwrite_output_dir=True,
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=False,
#     evaluation_strategy = 'epoch',
#     # load_best_model_at_end = True,
#     save_steps = 100
# )

In [20]:
# finetuning_loop(num_models = 5, 
#                  training_args = training_args, 
#                  dataset = scierc_dataset_finetuning,  
#                  adapter_name = "DAPT_TAPT_sci-erc",
#                  load_adapter = True,
#                  adapter_dir = "./training_output/pretraining/DAPT_TAPT",
#                  num_labels = num_of_labels)

TAPT Finetuning

In [21]:
# training_args = TrainingArguments(
#     learning_rate=2e-5,
#     num_train_epochs=50,
#     per_device_train_batch_size=16,
#     per_device_eval_batch_size=16,
#     logging_steps=100,
#     output_dir="./training_output/finetuning/TAPT",
#     overwrite_output_dir=True,
#     # The next line is important to ensure the dataset labels are properly passed to the model
#     remove_unused_columns=False,
#     evaluation_strategy = 'epoch',
#     # load_best_model_at_end = True,
#     save_steps = 100,
#     lr_scheduler_type = 'constant',
#     log_level  = 'error'
    
# )

training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=75,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=50,
    output_dir="./training_output/finetuning/TAPT",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
    evaluation_strategy = 'epoch',
    # load_best_model_at_end = True,
    save_steps = 100,
    # lr_scheduler_type = 'constant',
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [22]:
finetuning_loop(num_models = 2, 
                 training_args = training_args, 
                 dataset = dataset_finetuning,  
                 adapter_name = f"TAPT_{dataset_name}",
                 load_adapter = True,
                 adapter_dir = "./training_output/pretraining/TAPT",
                 num_labels = num_of_labels)

 58%|█████▊    | 4600/7950 [10:40<08:10,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-4600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.132, 'learning_rate': 4.213836477987422e-05, 'epoch': 43.4}


 59%|█████▊    | 4651/7950 [10:48<08:08,  6.76it/s]

{'loss': 0.0995, 'learning_rate': 4.150943396226415e-05, 'epoch': 43.87}


 59%|█████▊    | 4663/7950 [10:49<08:07,  6.74it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 59%|█████▊    | 4665/7950 [10:50<14:18,  3.83it/s]

{'eval_loss': 1.5342886447906494, 'eval_f1': 0.6649743635037753, 'eval_runtime': 0.5465, 'eval_samples_per_second': 208.602, 'eval_steps_per_second': 14.639, 'epoch': 44.0}


 59%|█████▉    | 4700/7950 [10:55<07:48,  6.94it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-4700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0642, 'learning_rate': 4.088050314465409e-05, 'epoch': 44.34}


 60%|█████▉    | 4751/7950 [11:03<07:48,  6.83it/s]

{'loss': 0.0817, 'learning_rate': 4.025157232704403e-05, 'epoch': 44.81}


 60%|█████▉    | 4769/7950 [11:05<07:34,  6.99it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 60%|██████    | 4771/7950 [11:06<13:33,  3.91it/s]

{'eval_loss': 1.7784576416015625, 'eval_f1': 0.6872937108231226, 'eval_runtime': 0.5375, 'eval_samples_per_second': 212.098, 'eval_steps_per_second': 14.884, 'epoch': 45.0}


 60%|██████    | 4800/7950 [11:10<07:30,  6.99it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-4800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0608, 'learning_rate': 3.962264150943397e-05, 'epoch': 45.28}


 61%|██████    | 4851/7950 [11:18<07:32,  6.85it/s]

{'loss': 0.0667, 'learning_rate': 3.8993710691823904e-05, 'epoch': 45.75}


 61%|██████▏   | 4875/7950 [11:21<07:19,  6.99it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 61%|██████▏   | 4877/7950 [11:22<13:08,  3.90it/s]

{'eval_loss': 1.4779257774353027, 'eval_f1': 0.6778050778050778, 'eval_runtime': 0.5355, 'eval_samples_per_second': 212.891, 'eval_steps_per_second': 14.94, 'epoch': 46.0}


 62%|██████▏   | 4900/7950 [11:25<07:17,  6.98it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-4900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-4900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0723, 'learning_rate': 3.836477987421384e-05, 'epoch': 46.23}


 62%|██████▏   | 4951/7950 [11:33<07:27,  6.70it/s]

{'loss': 0.0615, 'learning_rate': 3.7735849056603776e-05, 'epoch': 46.7}


 63%|██████▎   | 4981/7950 [11:37<07:13,  6.85it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 63%|██████▎   | 4983/7950 [11:38<12:58,  3.81it/s]

{'eval_loss': 1.5940022468566895, 'eval_f1': 0.6963534963534963, 'eval_runtime': 0.5535, 'eval_samples_per_second': 205.961, 'eval_steps_per_second': 14.453, 'epoch': 47.0}


 63%|██████▎   | 5000/7950 [11:40<07:12,  6.81it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5000
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0674, 'learning_rate': 3.710691823899371e-05, 'epoch': 47.17}


 64%|██████▎   | 5051/7950 [11:48<07:09,  6.74it/s]

{'loss': 0.1065, 'learning_rate': 3.647798742138365e-05, 'epoch': 47.64}


 64%|██████▍   | 5087/7950 [11:53<06:58,  6.84it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 64%|██████▍   | 5089/7950 [11:54<12:28,  3.82it/s]

{'eval_loss': 1.4433470964431763, 'eval_f1': 0.6989987595250753, 'eval_runtime': 0.5465, 'eval_samples_per_second': 208.602, 'eval_steps_per_second': 14.639, 'epoch': 48.0}


 64%|██████▍   | 5100/7950 [11:56<07:03,  6.73it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5100
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0425, 'learning_rate': 3.5849056603773584e-05, 'epoch': 48.11}


 65%|██████▍   | 5151/7950 [12:03<06:56,  6.72it/s]

{'loss': 0.0472, 'learning_rate': 3.522012578616352e-05, 'epoch': 48.58}


 65%|██████▌   | 5193/7950 [12:09<06:41,  6.87it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 65%|██████▌   | 5195/7950 [12:10<11:56,  3.85it/s]

{'eval_loss': 1.8267033100128174, 'eval_f1': 0.6898371633665752, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 49.0}


 65%|██████▌   | 5200/7950 [12:11<07:47,  5.88it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5200
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0906, 'learning_rate': 3.4591194968553456e-05, 'epoch': 49.06}


 66%|██████▌   | 5251/7950 [12:18<06:39,  6.75it/s]

{'loss': 0.0727, 'learning_rate': 3.39622641509434e-05, 'epoch': 49.53}


 67%|██████▋   | 5300/7950 [12:25<06:26,  6.86it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5300
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0779, 'learning_rate': 3.3333333333333335e-05, 'epoch': 50.0}


                                                   
 67%|██████▋   | 5301/7950 [12:26<12:19,  3.58it/s]

{'eval_loss': 1.6775009632110596, 'eval_f1': 0.6682775821652426, 'eval_runtime': 0.5445, 'eval_samples_per_second': 209.369, 'eval_steps_per_second': 14.693, 'epoch': 50.0}


 67%|██████▋   | 5351/7950 [12:34<06:25,  6.73it/s]

{'loss': 0.0713, 'learning_rate': 3.270440251572327e-05, 'epoch': 50.47}


 68%|██████▊   | 5400/7950 [12:41<06:11,  6.86it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5400
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0814, 'learning_rate': 3.207547169811321e-05, 'epoch': 50.94}


 68%|██████▊   | 5405/7950 [12:42<06:26,  6.59it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 68%|██████▊   | 5407/7950 [12:42<11:14,  3.77it/s]

{'eval_loss': 1.772558331489563, 'eval_f1': 0.7066391941391941, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 51.0}


 69%|██████▊   | 5451/7950 [12:49<06:11,  6.73it/s]

{'loss': 0.0706, 'learning_rate': 3.144654088050314e-05, 'epoch': 51.42}


 69%|██████▉   | 5500/7950 [12:56<05:59,  6.81it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5500
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0612, 'learning_rate': 3.081761006289308e-05, 'epoch': 51.89}


 69%|██████▉   | 5511/7950 [12:58<05:59,  6.79it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 69%|██████▉   | 5513/7950 [12:58<10:34,  3.84it/s]

{'eval_loss': 1.701988935470581, 'eval_f1': 0.6634800384800384, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 52.0}


 70%|██████▉   | 5551/7950 [13:04<05:56,  6.72it/s]

{'loss': 0.0576, 'learning_rate': 3.018867924528302e-05, 'epoch': 52.36}


 70%|███████   | 5600/7950 [13:11<05:42,  6.86it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0585, 'learning_rate': 2.9559748427672958e-05, 'epoch': 52.83}


 71%|███████   | 5617/7950 [13:14<05:42,  6.81it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 71%|███████   | 5619/7950 [13:14<10:06,  3.84it/s]

{'eval_loss': 1.6153020858764648, 'eval_f1': 0.7184704184704184, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 53.0}


 71%|███████   | 5651/7950 [13:19<05:40,  6.74it/s]

{'loss': 0.0719, 'learning_rate': 2.8930817610062894e-05, 'epoch': 53.3}


 72%|███████▏  | 5700/7950 [13:26<05:28,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0706, 'learning_rate': 2.830188679245283e-05, 'epoch': 53.77}


 72%|███████▏  | 5723/7950 [13:30<05:27,  6.80it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 72%|███████▏  | 5725/7950 [13:31<09:42,  3.82it/s]

{'eval_loss': 1.6964467763900757, 'eval_f1': 0.7018011750154608, 'eval_runtime': 0.5475, 'eval_samples_per_second': 208.22, 'eval_steps_per_second': 14.612, 'epoch': 54.0}


 72%|███████▏  | 5751/7950 [13:34<05:27,  6.70it/s]

{'loss': 0.0562, 'learning_rate': 2.767295597484277e-05, 'epoch': 54.25}


 73%|███████▎  | 5800/7950 [13:42<05:13,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0731, 'learning_rate': 2.7044025157232706e-05, 'epoch': 54.72}


 73%|███████▎  | 5829/7950 [13:46<05:09,  6.85it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 73%|███████▎  | 5831/7950 [13:47<09:10,  3.85it/s]

{'eval_loss': 1.6874831914901733, 'eval_f1': 0.7062604062604062, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 55.0}


 74%|███████▎  | 5851/7950 [13:50<05:11,  6.75it/s]

{'loss': 0.0294, 'learning_rate': 2.641509433962264e-05, 'epoch': 55.19}


 74%|███████▍  | 5900/7950 [13:57<05:00,  6.82it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-5900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-5900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.049, 'learning_rate': 2.578616352201258e-05, 'epoch': 55.66}


 75%|███████▍  | 5935/7950 [14:02<04:54,  6.83it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 75%|███████▍  | 5937/7950 [14:03<08:44,  3.83it/s]

{'eval_loss': 1.6531164646148682, 'eval_f1': 0.7377747252747252, 'eval_runtime': 0.5475, 'eval_samples_per_second': 208.221, 'eval_steps_per_second': 14.612, 'epoch': 56.0}


 75%|███████▍  | 5951/7950 [14:05<04:59,  6.67it/s]

{'loss': 0.0846, 'learning_rate': 2.5157232704402517e-05, 'epoch': 56.13}


 75%|███████▌  | 6000/7950 [14:12<04:45,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6000
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0466, 'learning_rate': 2.4528301886792453e-05, 'epoch': 56.6}


 76%|███████▌  | 6041/7950 [14:18<04:40,  6.80it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 76%|███████▌  | 6043/7950 [14:19<08:18,  3.83it/s]

{'eval_loss': 1.6532164812088013, 'eval_f1': 0.6729639011372758, 'eval_runtime': 0.5455, 'eval_samples_per_second': 208.984, 'eval_steps_per_second': 14.666, 'epoch': 57.0}


 76%|███████▌  | 6051/7950 [14:20<04:57,  6.37it/s]

{'loss': 0.0614, 'learning_rate': 2.3899371069182393e-05, 'epoch': 57.08}


 77%|███████▋  | 6100/7950 [14:27<04:29,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6100
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0561, 'learning_rate': 2.327044025157233e-05, 'epoch': 57.55}


 77%|███████▋  | 6147/7950 [14:34<04:22,  6.86it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 77%|███████▋  | 6149/7950 [14:35<07:49,  3.84it/s]

{'eval_loss': 1.7336537837982178, 'eval_f1': 0.6380235839892268, 'eval_runtime': 0.5465, 'eval_samples_per_second': 208.602, 'eval_steps_per_second': 14.639, 'epoch': 58.0}


 77%|███████▋  | 6151/7950 [14:35<06:23,  4.70it/s]

{'loss': 0.0901, 'learning_rate': 2.2641509433962265e-05, 'epoch': 58.02}


 78%|███████▊  | 6200/7950 [14:42<04:15,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6200
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0371, 'learning_rate': 2.2012578616352204e-05, 'epoch': 58.49}


 79%|███████▊  | 6251/7950 [14:50<04:13,  6.71it/s]

{'loss': 0.089, 'learning_rate': 2.138364779874214e-05, 'epoch': 58.96}


 79%|███████▊  | 6253/7950 [14:50<04:10,  6.78it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 79%|███████▊  | 6255/7950 [14:51<07:22,  3.83it/s]

{'eval_loss': 1.7313801050186157, 'eval_f1': 0.6872937108231226, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 59.0}


 79%|███████▉  | 6300/7950 [14:58<04:01,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6300
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0379, 'learning_rate': 2.0754716981132076e-05, 'epoch': 59.43}


 80%|███████▉  | 6351/7950 [15:05<03:57,  6.73it/s]

{'loss': 0.0735, 'learning_rate': 2.0125786163522016e-05, 'epoch': 59.91}


 80%|███████▉  | 6359/7950 [15:06<03:52,  6.84it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 80%|████████  | 6361/7950 [15:07<06:53,  3.84it/s]

{'eval_loss': 1.876115322113037, 'eval_f1': 0.6934835045129163, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 60.0}


 81%|████████  | 6400/7950 [15:13<03:46,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6400
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0504, 'learning_rate': 1.9496855345911952e-05, 'epoch': 60.38}


 81%|████████  | 6451/7950 [15:21<03:42,  6.75it/s]

{'loss': 0.0727, 'learning_rate': 1.8867924528301888e-05, 'epoch': 60.85}


 81%|████████▏ | 6465/7950 [15:23<03:38,  6.81it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 81%|████████▏ | 6467/7950 [15:23<06:26,  3.83it/s]

{'eval_loss': 1.7971702814102173, 'eval_f1': 0.6825039325039325, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 61.0}


 82%|████████▏ | 6500/7950 [15:28<03:38,  6.65it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6500
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0624, 'learning_rate': 1.8238993710691824e-05, 'epoch': 61.32}


 82%|████████▏ | 6551/7950 [15:36<03:28,  6.71it/s]

{'loss': 0.0546, 'learning_rate': 1.761006289308176e-05, 'epoch': 61.79}


 83%|████████▎ | 6571/7950 [15:39<03:25,  6.72it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 83%|████████▎ | 6573/7950 [15:40<06:05,  3.77it/s]

{'eval_loss': 1.8231459856033325, 'eval_f1': 0.7203144078144078, 'eval_runtime': 0.5565, 'eval_samples_per_second': 204.85, 'eval_steps_per_second': 14.375, 'epoch': 62.0}


 83%|████████▎ | 6600/7950 [15:44<03:17,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0659, 'learning_rate': 1.69811320754717e-05, 'epoch': 62.26}


 84%|████████▎ | 6651/7950 [15:51<03:12,  6.73it/s]

{'loss': 0.0499, 'learning_rate': 1.6352201257861635e-05, 'epoch': 62.74}


 84%|████████▍ | 6677/7950 [15:55<03:06,  6.83it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 84%|████████▍ | 6679/7950 [15:56<05:31,  3.83it/s]

{'eval_loss': 1.8380471467971802, 'eval_f1': 0.6934835045129163, 'eval_runtime': 0.5445, 'eval_samples_per_second': 209.369, 'eval_steps_per_second': 14.693, 'epoch': 63.0}


 84%|████████▍ | 6700/7950 [15:59<03:03,  6.81it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0503, 'learning_rate': 1.572327044025157e-05, 'epoch': 63.21}


 85%|████████▍ | 6751/7950 [16:06<02:59,  6.69it/s]

{'loss': 0.0634, 'learning_rate': 1.509433962264151e-05, 'epoch': 63.68}


 85%|████████▌ | 6783/7950 [16:11<02:50,  6.84it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 85%|████████▌ | 6785/7950 [16:12<05:04,  3.83it/s]

{'eval_loss': 1.782190203666687, 'eval_f1': 0.6995189513669472, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.841, 'eval_steps_per_second': 14.585, 'epoch': 64.0}


 86%|████████▌ | 6800/7950 [16:14<02:48,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0372, 'learning_rate': 1.4465408805031447e-05, 'epoch': 64.15}


 86%|████████▌ | 6851/7950 [16:22<02:39,  6.90it/s]

{'loss': 0.0591, 'learning_rate': 1.3836477987421385e-05, 'epoch': 64.62}


 87%|████████▋ | 6889/7950 [16:27<02:32,  6.97it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 87%|████████▋ | 6891/7950 [16:28<04:29,  3.93it/s]

{'eval_loss': 1.7959996461868286, 'eval_f1': 0.6963534963534963, 'eval_runtime': 0.5305, 'eval_samples_per_second': 214.899, 'eval_steps_per_second': 15.081, 'epoch': 65.0}


 87%|████████▋ | 6900/7950 [16:29<02:36,  6.73it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-6900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-6900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.053, 'learning_rate': 1.320754716981132e-05, 'epoch': 65.09}


 87%|████████▋ | 6951/7950 [16:37<02:28,  6.74it/s]

{'loss': 0.0589, 'learning_rate': 1.2578616352201259e-05, 'epoch': 65.57}


 88%|████████▊ | 6995/7950 [16:43<02:19,  6.84it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 88%|████████▊ | 6997/7950 [16:44<04:08,  3.84it/s]

{'eval_loss': 1.9197551012039185, 'eval_f1': 0.6749391194341742, 'eval_runtime': 0.5445, 'eval_samples_per_second': 209.369, 'eval_steps_per_second': 14.693, 'epoch': 66.0}


 88%|████████▊ | 7000/7950 [16:44<03:03,  5.18it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7000
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7000\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0568, 'learning_rate': 1.1949685534591196e-05, 'epoch': 66.04}


 89%|████████▊ | 7051/7950 [16:52<02:12,  6.76it/s]

{'loss': 0.048, 'learning_rate': 1.1320754716981132e-05, 'epoch': 66.51}


 89%|████████▉ | 7100/7950 [16:59<02:04,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7100
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7100\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0393, 'learning_rate': 1.069182389937107e-05, 'epoch': 66.98}


 89%|████████▉ | 7101/7950 [16:59<02:28,  5.73it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 89%|████████▉ | 7103/7950 [17:00<03:54,  3.61it/s]

{'eval_loss': 1.8840702772140503, 'eval_f1': 0.6961080586080586, 'eval_runtime': 0.5475, 'eval_samples_per_second': 208.22, 'eval_steps_per_second': 14.612, 'epoch': 67.0}


 90%|████████▉ | 7151/7950 [17:07<01:59,  6.69it/s]

{'loss': 0.0298, 'learning_rate': 1.0062893081761008e-05, 'epoch': 67.45}


 91%|█████████ | 7200/7950 [17:14<01:49,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7200
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7200\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0621, 'learning_rate': 9.433962264150944e-06, 'epoch': 67.92}


 91%|█████████ | 7207/7950 [17:15<01:53,  6.52it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 91%|█████████ | 7209/7950 [17:16<03:18,  3.73it/s]

{'eval_loss': 1.7616193294525146, 'eval_f1': 0.6762855794589541, 'eval_runtime': 0.5565, 'eval_samples_per_second': 204.85, 'eval_steps_per_second': 14.375, 'epoch': 68.0}


 91%|█████████ | 7251/7950 [17:22<01:43,  6.73it/s]

{'loss': 0.0312, 'learning_rate': 8.80503144654088e-06, 'epoch': 68.4}


 92%|█████████▏| 7300/7950 [17:30<01:34,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7300
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7300\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0857, 'learning_rate': 8.176100628930818e-06, 'epoch': 68.87}


 92%|█████████▏| 7313/7950 [17:32<01:33,  6.78it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 92%|█████████▏| 7315/7950 [17:33<02:51,  3.69it/s]

{'eval_loss': 1.7589722871780396, 'eval_f1': 0.705122853337139, 'eval_runtime': 0.5775, 'eval_samples_per_second': 197.394, 'eval_steps_per_second': 13.852, 'epoch': 69.0}


 92%|█████████▏| 7351/7950 [17:38<01:31,  6.58it/s]

{'loss': 0.0527, 'learning_rate': 7.547169811320755e-06, 'epoch': 69.34}


 93%|█████████▎| 7400/7950 [17:45<01:20,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7400
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0574, 'learning_rate': 6.918238993710692e-06, 'epoch': 69.81}


 93%|█████████▎| 7419/7950 [17:48<01:17,  6.81it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 93%|█████████▎| 7421/7950 [17:49<02:18,  3.83it/s]

{'eval_loss': 1.7806055545806885, 'eval_f1': 0.6829523689817808, 'eval_runtime': 0.5455, 'eval_samples_per_second': 208.985, 'eval_steps_per_second': 14.666, 'epoch': 70.0}


 94%|█████████▎| 7451/7950 [17:53<01:13,  6.77it/s]

{'loss': 0.0408, 'learning_rate': 6.289308176100629e-06, 'epoch': 70.28}


 94%|█████████▍| 7500/7950 [18:01<01:08,  6.53it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7500
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0443, 'learning_rate': 5.660377358490566e-06, 'epoch': 70.75}


 95%|█████████▍| 7525/7950 [18:04<01:04,  6.62it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 95%|█████████▍| 7527/7950 [18:05<01:51,  3.79it/s]

{'eval_loss': 1.8257007598876953, 'eval_f1': 0.6906153891448009, 'eval_runtime': 0.5475, 'eval_samples_per_second': 208.22, 'eval_steps_per_second': 14.612, 'epoch': 71.0}


 95%|█████████▍| 7551/7950 [18:09<00:59,  6.75it/s]

{'loss': 0.0763, 'learning_rate': 5.031446540880504e-06, 'epoch': 71.23}


 96%|█████████▌| 7600/7950 [18:16<00:51,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0386, 'learning_rate': 4.40251572327044e-06, 'epoch': 71.7}


 96%|█████████▌| 7631/7950 [18:21<00:47,  6.68it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 96%|█████████▌| 7633/7950 [18:22<01:24,  3.76it/s]

{'eval_loss': 1.8420315980911255, 'eval_f1': 0.6961080586080586, 'eval_runtime': 0.5515, 'eval_samples_per_second': 206.709, 'eval_steps_per_second': 14.506, 'epoch': 72.0}


 96%|█████████▌| 7651/7950 [18:24<00:44,  6.74it/s]

{'loss': 0.0328, 'learning_rate': 3.7735849056603773e-06, 'epoch': 72.17}


 97%|█████████▋| 7700/7950 [18:32<00:36,  6.79it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0435, 'learning_rate': 3.1446540880503146e-06, 'epoch': 72.64}


 97%|█████████▋| 7737/7950 [18:37<00:31,  6.83it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 97%|█████████▋| 7739/7950 [18:38<00:54,  3.85it/s]

{'eval_loss': 1.7152674198150635, 'eval_f1': 0.6858223608223608, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 73.0}


 97%|█████████▋| 7751/7950 [18:40<00:29,  6.68it/s]

{'loss': 0.0351, 'learning_rate': 2.515723270440252e-06, 'epoch': 73.11}


 98%|█████████▊| 7800/7950 [18:47<00:21,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0424, 'learning_rate': 1.8867924528301887e-06, 'epoch': 73.58}


 99%|█████████▊| 7843/7950 [18:53<00:15,  7.01it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16
                                                   
 99%|█████████▊| 7845/7950 [18:54<00:26,  3.96it/s]

{'eval_loss': 1.745067834854126, 'eval_f1': 0.6781593406593407, 'eval_runtime': 0.5255, 'eval_samples_per_second': 216.946, 'eval_steps_per_second': 15.224, 'epoch': 74.0}


 99%|█████████▉| 7851/7950 [18:55<00:15,  6.21it/s]

{'loss': 0.0664, 'learning_rate': 1.257861635220126e-06, 'epoch': 74.06}


 99%|█████████▉| 7900/7950 [19:02<00:07,  6.88it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\ch

{'loss': 0.0256, 'learning_rate': 6.28930817610063e-07, 'epoch': 74.53}


100%|██████████| 7950/7950 [19:09<00:00,  6.90it/s]***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0435, 'learning_rate': 0.0, 'epoch': 75.0}


                                                   
100%|██████████| 7950/7950 [19:10<00:00,  6.90it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 7950/7950 [19:10<00:00,  6.91it/s]
***** Running Evaluation *****
  Num examples = 139
  Batch size = 16


{'eval_loss': 1.7449746131896973, 'eval_f1': 0.6781593406593407, 'eval_runtime': 0.5245, 'eval_samples_per_second': 217.36, 'eval_steps_per_second': 15.253, 'epoch': 75.0}
{'train_runtime': 1150.2429, 'train_samples_per_second': 110.064, 'train_steps_per_second': 6.912, 'train_loss': 0.22510552100415498, 'epoch': 75.0}


100%|██████████| 9/9 [00:00<00:00, 15.61it/s]
Configuration saved in ./training_output/finetuning/TAPT\TAPT_citation_intent_1\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\TAPT_citation_intent_1\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\TAPT_citation_intent_1\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\TAPT_citation_intent_1\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\TAPT_citation_intent_1\pytorch_model_head.bin


Only Finetuning

In [11]:
training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=50,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=50,
    output_dir="./training_output/finetuning/No_Pretrain",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=True,
    evaluation_strategy = 'steps',
    # load_best_model_at_end = True,
    save_steps = 100,
    # lr_scheduler_type = 'constant',
)

In [14]:
finetuning_loop(num_models = 1, 
                 training_args = training_args, 
                 dataset = dataset_finetuning,  
                 adapter_name = dataset_name,
                 load_adapter = False,
                 num_labels = num_of_labels)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaAdapterModel: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You

{'loss': 1.4842, 'learning_rate': 9.905660377358492e-05, 'epoch': 0.47}


                                                 
  1%|          | 51/5300 [00:10<28:16,  3.09it/s]

{'eval_loss': 1.3228694200515747, 'eval_f1': 0.11368015414258188, 'eval_runtime': 0.5735, 'eval_samples_per_second': 198.773, 'eval_steps_per_second': 13.949, 'epoch': 0.47}


  2%|▏         | 100/5300 [00:17<12:28,  6.95it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 1.3254, 'learning_rate': 9.811320754716981e-05, 'epoch': 0.94}


                                                  
  2%|▏         | 100/5300 [00:18<12:28,  6.95it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-100
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-100\citation_intent_0\head_config.js

{'eval_loss': 1.3105509281158447, 'eval_f1': 0.11368015414258188, 'eval_runtime': 0.5605, 'eval_samples_per_second': 203.387, 'eval_steps_per_second': 14.273, 'epoch': 0.94}


  3%|▎         | 150/5300 [00:25<12:42,  6.75it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 1.2213, 'learning_rate': 9.716981132075472e-05, 'epoch': 1.42}


                                                  
  3%|▎         | 151/5300 [00:26<28:46,  2.98it/s]

{'eval_loss': 1.1323589086532593, 'eval_f1': 0.19568733153638815, 'eval_runtime': 0.5985, 'eval_samples_per_second': 190.463, 'eval_steps_per_second': 13.366, 'epoch': 1.42}


  4%|▍         | 200/5300 [00:32<11:05,  7.67it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 1.1233, 'learning_rate': 9.622641509433963e-05, 'epoch': 1.89}


                                                  
  4%|▍         | 200/5300 [00:33<11:05,  7.67it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-200
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-200\citation_intent_0\head_config.js

{'eval_loss': 0.9963754415512085, 'eval_f1': 0.3032828162354425, 'eval_runtime': 0.5255, 'eval_samples_per_second': 216.946, 'eval_steps_per_second': 15.224, 'epoch': 1.89}


  5%|▍         | 250/5300 [00:40<11:05,  7.58it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 1.0172, 'learning_rate': 9.528301886792453e-05, 'epoch': 2.36}


                                                  
  5%|▍         | 251/5300 [00:40<24:05,  3.49it/s]

{'eval_loss': 0.9688588380813599, 'eval_f1': 0.3280692640692641, 'eval_runtime': 0.4944, 'eval_samples_per_second': 230.56, 'eval_steps_per_second': 16.18, 'epoch': 2.36}


  6%|▌         | 300/5300 [00:47<10:55,  7.62it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 1.0164, 'learning_rate': 9.433962264150944e-05, 'epoch': 2.83}


                                                  
  6%|▌         | 300/5300 [00:47<10:55,  7.62it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-300
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-300\citation_intent_0\head_config.js

{'eval_loss': 0.9394617676734924, 'eval_f1': 0.32316091276854797, 'eval_runtime': 0.4864, 'eval_samples_per_second': 234.355, 'eval_steps_per_second': 16.446, 'epoch': 2.83}


  7%|▋         | 350/5300 [00:54<10:46,  7.66it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.9528, 'learning_rate': 9.339622641509434e-05, 'epoch': 3.3}


                                                  
  7%|▋         | 351/5300 [00:54<23:14,  3.55it/s]

{'eval_loss': 0.8289223313331604, 'eval_f1': 0.3819602088817919, 'eval_runtime': 0.4864, 'eval_samples_per_second': 234.355, 'eval_steps_per_second': 16.446, 'epoch': 3.3}


  8%|▊         | 400/5300 [01:01<10:41,  7.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.8867, 'learning_rate': 9.245283018867925e-05, 'epoch': 3.77}


                                                  
  8%|▊         | 400/5300 [01:01<10:41,  7.64it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-400
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-400\citation_intent_0\head_config.js

{'eval_loss': 0.8104235529899597, 'eval_f1': 0.41316875017479093, 'eval_runtime': 0.488, 'eval_samples_per_second': 233.607, 'eval_steps_per_second': 16.393, 'epoch': 3.77}


  8%|▊         | 450/5300 [01:08<10:32,  7.67it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.8543, 'learning_rate': 9.150943396226416e-05, 'epoch': 4.25}


                                                  
  9%|▊         | 451/5300 [01:09<22:44,  3.55it/s]

{'eval_loss': 0.7767825722694397, 'eval_f1': 0.46478428978428976, 'eval_runtime': 0.4844, 'eval_samples_per_second': 235.324, 'eval_steps_per_second': 16.514, 'epoch': 4.25}


  9%|▉         | 500/5300 [01:15<10:27,  7.66it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.8281, 'learning_rate': 9.056603773584906e-05, 'epoch': 4.72}


                                                  
  9%|▉         | 500/5300 [01:15<10:27,  7.66it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-500
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-500\citation_intent_0\head_config.js

{'eval_loss': 0.7363206148147583, 'eval_f1': 0.4794077134986226, 'eval_runtime': 0.5005, 'eval_samples_per_second': 227.793, 'eval_steps_per_second': 15.985, 'epoch': 4.72}


 10%|█         | 550/5300 [01:22<10:35,  7.47it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.701, 'learning_rate': 8.962264150943397e-05, 'epoch': 5.19}


                                                  
 10%|█         | 551/5300 [01:23<23:30,  3.37it/s]

{'eval_loss': 0.7232803106307983, 'eval_f1': 0.5599016790877257, 'eval_runtime': 0.5185, 'eval_samples_per_second': 219.878, 'eval_steps_per_second': 15.43, 'epoch': 5.19}


 11%|█▏        | 600/5300 [01:30<10:07,  7.74it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.765, 'learning_rate': 8.867924528301888e-05, 'epoch': 5.66}


                                                  
 11%|█▏        | 600/5300 [01:30<10:07,  7.74it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-600
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-600\citation_intent_0\head_config.js

{'eval_loss': 0.7354933023452759, 'eval_f1': 0.5944827571549905, 'eval_runtime': 0.4844, 'eval_samples_per_second': 235.324, 'eval_steps_per_second': 16.514, 'epoch': 5.66}


 12%|█▏        | 650/5300 [01:37<10:28,  7.40it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.6861, 'learning_rate': 8.773584905660378e-05, 'epoch': 6.13}


                                                  
 12%|█▏        | 651/5300 [01:37<23:17,  3.33it/s]

{'eval_loss': 0.7398152947425842, 'eval_f1': 0.5496797294416342, 'eval_runtime': 0.5255, 'eval_samples_per_second': 216.946, 'eval_steps_per_second': 15.224, 'epoch': 6.13}


 13%|█▎        | 700/5300 [01:44<10:40,  7.19it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.6494, 'learning_rate': 8.679245283018869e-05, 'epoch': 6.6}


                                                  
 13%|█▎        | 700/5300 [01:44<10:40,  7.19it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-700
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-700\citation_intent_0\head_config.js

{'eval_loss': 0.7264271974563599, 'eval_f1': 0.5731379074927462, 'eval_runtime': 0.5165, 'eval_samples_per_second': 220.73, 'eval_steps_per_second': 15.49, 'epoch': 6.6}


 14%|█▍        | 750/5300 [01:51<10:42,  7.08it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.6433, 'learning_rate': 8.584905660377359e-05, 'epoch': 7.08}


                                                  
 14%|█▍        | 751/5300 [01:52<23:07,  3.28it/s]

{'eval_loss': 0.7228587865829468, 'eval_f1': 0.5759570494864612, 'eval_runtime': 0.5315, 'eval_samples_per_second': 214.495, 'eval_steps_per_second': 15.052, 'epoch': 7.08}


 15%|█▌        | 800/5300 [01:59<09:52,  7.60it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.5797, 'learning_rate': 8.49056603773585e-05, 'epoch': 7.55}


                                                  
 15%|█▌        | 800/5300 [01:59<09:52,  7.60it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-800
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-800\citation_intent_0\head_config.js

{'eval_loss': 0.8190287351608276, 'eval_f1': 0.520638312304979, 'eval_runtime': 0.4924, 'eval_samples_per_second': 231.497, 'eval_steps_per_second': 16.245, 'epoch': 7.55}


 16%|█▌        | 850/5300 [02:06<09:03,  8.19it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.5983, 'learning_rate': 8.396226415094341e-05, 'epoch': 8.02}


                                                  
 16%|█▌        | 851/5300 [02:07<19:28,  3.81it/s]

{'eval_loss': 0.7162811756134033, 'eval_f1': 0.5441565515094927, 'eval_runtime': 0.4995, 'eval_samples_per_second': 228.25, 'eval_steps_per_second': 16.018, 'epoch': 8.02}


 17%|█▋        | 900/5300 [02:13<09:55,  7.39it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.5382, 'learning_rate': 8.30188679245283e-05, 'epoch': 8.49}


                                                  
 17%|█▋        | 900/5300 [02:14<09:55,  7.39it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-900
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-900\citation_intent_0\head_config.js

{'eval_loss': 0.7161386013031006, 'eval_f1': 0.578446335799277, 'eval_runtime': 0.4934, 'eval_samples_per_second': 231.028, 'eval_steps_per_second': 16.212, 'epoch': 8.49}


 18%|█▊        | 950/5300 [02:21<09:50,  7.37it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.5683, 'learning_rate': 8.207547169811322e-05, 'epoch': 8.96}


                                                  
 18%|█▊        | 951/5300 [02:21<21:31,  3.37it/s]

{'eval_loss': 0.7548505663871765, 'eval_f1': 0.5699344515133989, 'eval_runtime': 0.5205, 'eval_samples_per_second': 219.032, 'eval_steps_per_second': 15.371, 'epoch': 8.96}


 19%|█▉        | 1000/5300 [02:28<09:28,  7.56it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.4763, 'learning_rate': 8.113207547169813e-05, 'epoch': 9.43}


                                                   
 19%|█▉        | 1000/5300 [02:29<09:28,  7.56it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1000
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1000\citation_intent_0\head

{'eval_loss': 0.7043119072914124, 'eval_f1': 0.5722878390201225, 'eval_runtime': 0.4874, 'eval_samples_per_second': 233.874, 'eval_steps_per_second': 16.412, 'epoch': 9.43}


 20%|█▉        | 1050/5300 [02:35<09:45,  7.26it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.4847, 'learning_rate': 8.018867924528302e-05, 'epoch': 9.91}


                                                   
 20%|█▉        | 1051/5300 [02:36<21:27,  3.30it/s]

{'eval_loss': 0.7743850350379944, 'eval_f1': 0.5874509959737232, 'eval_runtime': 0.5265, 'eval_samples_per_second': 216.534, 'eval_steps_per_second': 15.195, 'epoch': 9.91}


 21%|██        | 1100/5300 [02:43<09:44,  7.19it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.4981, 'learning_rate': 7.924528301886794e-05, 'epoch': 10.38}


                                                   
 21%|██        | 1100/5300 [02:43<09:44,  7.19it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1100
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1100\citation_intent_0\head

{'eval_loss': 0.7754744291305542, 'eval_f1': 0.5648174924490714, 'eval_runtime': 0.5275, 'eval_samples_per_second': 216.123, 'eval_steps_per_second': 15.167, 'epoch': 10.38}


 22%|██▏       | 1150/5300 [02:50<09:37,  7.18it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.438, 'learning_rate': 7.830188679245283e-05, 'epoch': 10.85}


                                                   
 22%|██▏       | 1151/5300 [02:51<21:23,  3.23it/s]

{'eval_loss': 0.677969753742218, 'eval_f1': 0.6193116464855595, 'eval_runtime': 0.5465, 'eval_samples_per_second': 208.602, 'eval_steps_per_second': 14.639, 'epoch': 10.85}


 23%|██▎       | 1200/5300 [02:58<08:56,  7.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.4093, 'learning_rate': 7.735849056603774e-05, 'epoch': 11.32}


                                                   
 23%|██▎       | 1200/5300 [02:58<08:56,  7.64it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1200
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1200\citation_intent_0\head

{'eval_loss': 0.7666207551956177, 'eval_f1': 0.5808139534883722, 'eval_runtime': 0.5295, 'eval_samples_per_second': 215.305, 'eval_steps_per_second': 15.109, 'epoch': 11.32}


 24%|██▎       | 1250/5300 [03:05<08:49,  7.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.4311, 'learning_rate': 7.641509433962265e-05, 'epoch': 11.79}


                                                   
 24%|██▎       | 1251/5300 [03:06<19:21,  3.49it/s]

{'eval_loss': 0.7610368728637695, 'eval_f1': 0.5842957489509214, 'eval_runtime': 0.4914, 'eval_samples_per_second': 231.969, 'eval_steps_per_second': 16.278, 'epoch': 11.79}


 25%|██▍       | 1300/5300 [03:12<08:35,  7.77it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.3815, 'learning_rate': 7.547169811320755e-05, 'epoch': 12.26}


                                                   
 25%|██▍       | 1300/5300 [03:13<08:35,  7.77it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1300
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1300\citation_intent_0\head

{'eval_loss': 0.7546194791793823, 'eval_f1': 0.5383115703384992, 'eval_runtime': 0.4784, 'eval_samples_per_second': 238.278, 'eval_steps_per_second': 16.721, 'epoch': 12.26}


 25%|██▌       | 1350/5300 [03:20<08:56,  7.36it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.4044, 'learning_rate': 7.452830188679245e-05, 'epoch': 12.74}


                                                   
 25%|██▌       | 1351/5300 [03:20<20:20,  3.24it/s]

{'eval_loss': 0.7254762649536133, 'eval_f1': 0.6259359288915939, 'eval_runtime': 0.5355, 'eval_samples_per_second': 212.891, 'eval_steps_per_second': 14.94, 'epoch': 12.74}


 26%|██▋       | 1400/5300 [03:27<09:10,  7.09it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.3183, 'learning_rate': 7.358490566037736e-05, 'epoch': 13.21}


                                                   
 26%|██▋       | 1400/5300 [03:28<09:10,  7.09it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1400
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1400\citation_intent_0\head

{'eval_loss': 0.7877862453460693, 'eval_f1': 0.5759002392390352, 'eval_runtime': 0.5285, 'eval_samples_per_second': 215.713, 'eval_steps_per_second': 15.138, 'epoch': 13.21}


 27%|██▋       | 1450/5300 [03:34<09:19,  6.88it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.366, 'learning_rate': 7.264150943396226e-05, 'epoch': 13.68}


                                                   
 27%|██▋       | 1451/5300 [03:35<19:30,  3.29it/s]

{'eval_loss': 0.7384224534034729, 'eval_f1': 0.631448205105494, 'eval_runtime': 0.5255, 'eval_samples_per_second': 216.946, 'eval_steps_per_second': 15.224, 'epoch': 13.68}


 28%|██▊       | 1500/5300 [03:41<08:09,  7.77it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.3898, 'learning_rate': 7.169811320754717e-05, 'epoch': 14.15}


                                                   
 28%|██▊       | 1500/5300 [03:42<08:09,  7.77it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1500
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1500\citation_intent_0\head

{'eval_loss': 0.6935425996780396, 'eval_f1': 0.6167579548595592, 'eval_runtime': 0.4764, 'eval_samples_per_second': 239.279, 'eval_steps_per_second': 16.791, 'epoch': 14.15}


 29%|██▉       | 1550/5300 [03:48<08:12,  7.62it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2721, 'learning_rate': 7.075471698113208e-05, 'epoch': 14.62}


                                                   
 29%|██▉       | 1551/5300 [03:49<17:27,  3.58it/s]

{'eval_loss': 0.7419702410697937, 'eval_f1': 0.6140111540111539, 'eval_runtime': 0.4774, 'eval_samples_per_second': 238.777, 'eval_steps_per_second': 16.756, 'epoch': 14.62}


 30%|███       | 1600/5300 [03:55<07:52,  7.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.3712, 'learning_rate': 6.981132075471698e-05, 'epoch': 15.09}


                                                   
 30%|███       | 1600/5300 [03:56<07:52,  7.84it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1600
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1600\citation_intent_0\head

{'eval_loss': 0.7454748749732971, 'eval_f1': 0.6353513907773543, 'eval_runtime': 0.4774, 'eval_samples_per_second': 238.777, 'eval_steps_per_second': 16.756, 'epoch': 15.09}


 31%|███       | 1650/5300 [04:03<07:53,  7.70it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2895, 'learning_rate': 6.886792452830189e-05, 'epoch': 15.57}


                                                   
 31%|███       | 1651/5300 [04:03<17:08,  3.55it/s]

{'eval_loss': 0.71061110496521, 'eval_f1': 0.6775974025974025, 'eval_runtime': 0.4874, 'eval_samples_per_second': 233.874, 'eval_steps_per_second': 16.412, 'epoch': 15.57}


 32%|███▏      | 1700/5300 [04:10<07:39,  7.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.3369, 'learning_rate': 6.79245283018868e-05, 'epoch': 16.04}


                                                   
 32%|███▏      | 1700/5300 [04:10<07:39,  7.83it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1700
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1700\citation_intent_0\head

{'eval_loss': 0.9041547179222107, 'eval_f1': 0.5093926379104355, 'eval_runtime': 0.4864, 'eval_samples_per_second': 234.355, 'eval_steps_per_second': 16.446, 'epoch': 16.04}


 33%|███▎      | 1750/5300 [04:17<07:46,  7.61it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2491, 'learning_rate': 6.69811320754717e-05, 'epoch': 16.51}


                                                   
 33%|███▎      | 1751/5300 [04:18<16:46,  3.53it/s]

{'eval_loss': 0.7435706853866577, 'eval_f1': 0.6622250057032666, 'eval_runtime': 0.4874, 'eval_samples_per_second': 233.874, 'eval_steps_per_second': 16.412, 'epoch': 16.51}


 34%|███▍      | 1800/5300 [04:24<07:34,  7.71it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2986, 'learning_rate': 6.60377358490566e-05, 'epoch': 16.98}


                                                   
 34%|███▍      | 1800/5300 [04:25<07:34,  7.71it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1800
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1800\citation_intent_0\head

{'eval_loss': 0.7784269452095032, 'eval_f1': 0.6353700417365014, 'eval_runtime': 0.4784, 'eval_samples_per_second': 238.277, 'eval_steps_per_second': 16.721, 'epoch': 16.98}


 35%|███▍      | 1850/5300 [04:31<07:21,  7.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2793, 'learning_rate': 6.50943396226415e-05, 'epoch': 17.45}


                                                   
 35%|███▍      | 1851/5300 [04:32<15:53,  3.62it/s]

{'eval_loss': 0.761978268623352, 'eval_f1': 0.6343967560499001, 'eval_runtime': 0.4774, 'eval_samples_per_second': 238.777, 'eval_steps_per_second': 16.756, 'epoch': 17.45}


 36%|███▌      | 1900/5300 [04:38<07:17,  7.78it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2463, 'learning_rate': 6.415094339622641e-05, 'epoch': 17.92}


                                                   
 36%|███▌      | 1900/5300 [04:39<07:17,  7.78it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-1900
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-1900\citation_intent_0\head

{'eval_loss': 0.9387152791023254, 'eval_f1': 0.6528865692814967, 'eval_runtime': 0.4774, 'eval_samples_per_second': 238.777, 'eval_steps_per_second': 16.756, 'epoch': 17.92}


 37%|███▋      | 1950/5300 [04:45<08:01,  6.96it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2124, 'learning_rate': 6.320754716981132e-05, 'epoch': 18.4}


                                                   
 37%|███▋      | 1951/5300 [04:46<16:25,  3.40it/s]

{'eval_loss': 0.8154286742210388, 'eval_f1': 0.6258928571428571, 'eval_runtime': 0.4924, 'eval_samples_per_second': 231.497, 'eval_steps_per_second': 16.245, 'epoch': 18.4}


 38%|███▊      | 2000/5300 [04:52<07:12,  7.63it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2523, 'learning_rate': 6.226415094339622e-05, 'epoch': 18.87}


                                                   
 38%|███▊      | 2000/5300 [04:53<07:12,  7.63it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2000
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2000\citation_intent_0\head

{'eval_loss': 0.9015530943870544, 'eval_f1': 0.6308542356336474, 'eval_runtime': 0.5165, 'eval_samples_per_second': 220.73, 'eval_steps_per_second': 15.49, 'epoch': 18.87}


 39%|███▊      | 2050/5300 [05:00<07:10,  7.55it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1957, 'learning_rate': 6.132075471698113e-05, 'epoch': 19.34}


                                                   
 39%|███▊      | 2051/5300 [05:00<15:22,  3.52it/s]

{'eval_loss': 0.8501983284950256, 'eval_f1': 0.6429345332947028, 'eval_runtime': 0.4884, 'eval_samples_per_second': 233.395, 'eval_steps_per_second': 16.379, 'epoch': 19.34}


 40%|███▉      | 2100/5300 [05:07<06:54,  7.72it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2331, 'learning_rate': 6.037735849056604e-05, 'epoch': 19.81}


                                                   
 40%|███▉      | 2100/5300 [05:07<06:54,  7.72it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2100
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2100\citation_intent_0\head

{'eval_loss': 0.9000172019004822, 'eval_f1': 0.6384318134033274, 'eval_runtime': 0.4794, 'eval_samples_per_second': 237.78, 'eval_steps_per_second': 16.686, 'epoch': 19.81}


 41%|████      | 2150/5300 [05:14<06:49,  7.69it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2687, 'learning_rate': 5.943396226415094e-05, 'epoch': 20.28}


                                                   
 41%|████      | 2151/5300 [05:14<14:47,  3.55it/s]

{'eval_loss': 0.9493523836135864, 'eval_f1': 0.630466572836031, 'eval_runtime': 0.4854, 'eval_samples_per_second': 234.839, 'eval_steps_per_second': 16.48, 'epoch': 20.28}


 42%|████▏     | 2200/5300 [05:21<07:08,  7.23it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.2121, 'learning_rate': 5.849056603773585e-05, 'epoch': 20.75}


                                                   
 42%|████▏     | 2200/5300 [05:21<07:08,  7.23it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2200
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2200\citation_intent_0\head

{'eval_loss': 0.8624254465103149, 'eval_f1': 0.6181715459044975, 'eval_runtime': 0.5465, 'eval_samples_per_second': 208.602, 'eval_steps_per_second': 14.639, 'epoch': 20.75}


 42%|████▏     | 2250/5300 [05:28<06:38,  7.66it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1718, 'learning_rate': 5.7547169811320756e-05, 'epoch': 21.23}


                                                   
 42%|████▏     | 2251/5300 [05:29<14:30,  3.50it/s]

{'eval_loss': 0.8558226227760315, 'eval_f1': 0.6178683957285652, 'eval_runtime': 0.4965, 'eval_samples_per_second': 229.63, 'eval_steps_per_second': 16.114, 'epoch': 21.23}


 43%|████▎     | 2300/5300 [05:35<06:43,  7.44it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.174, 'learning_rate': 5.660377358490566e-05, 'epoch': 21.7}


                                                   
 43%|████▎     | 2300/5300 [05:36<06:43,  7.44it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2300
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2300\citation_intent_0\head

{'eval_loss': 0.880394458770752, 'eval_f1': 0.6548163416669911, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.841, 'eval_steps_per_second': 14.585, 'epoch': 21.7}


 44%|████▍     | 2350/5300 [05:43<06:27,  7.61it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1583, 'learning_rate': 5.5660377358490564e-05, 'epoch': 22.17}


                                                   
 44%|████▍     | 2351/5300 [05:43<14:06,  3.48it/s]

{'eval_loss': 0.960555374622345, 'eval_f1': 0.6420698203147529, 'eval_runtime': 0.4894, 'eval_samples_per_second': 232.917, 'eval_steps_per_second': 16.345, 'epoch': 22.17}


 45%|████▌     | 2400/5300 [05:50<06:32,  7.40it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.156, 'learning_rate': 5.4716981132075475e-05, 'epoch': 22.64}


                                                   
 45%|████▌     | 2400/5300 [05:50<06:32,  7.40it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2400
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2400\citation_intent_0\head

{'eval_loss': 1.0586646795272827, 'eval_f1': 0.6474063148517092, 'eval_runtime': 0.4874, 'eval_samples_per_second': 233.874, 'eval_steps_per_second': 16.412, 'epoch': 22.64}


 46%|████▌     | 2450/5300 [05:57<06:04,  7.81it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1608, 'learning_rate': 5.377358490566038e-05, 'epoch': 23.11}


                                                   
 46%|████▌     | 2451/5300 [05:58<13:43,  3.46it/s]

{'eval_loss': 0.9470317959785461, 'eval_f1': 0.6680719656283566, 'eval_runtime': 0.5175, 'eval_samples_per_second': 220.303, 'eval_steps_per_second': 15.46, 'epoch': 23.11}


 47%|████▋     | 2500/5300 [06:04<06:20,  7.37it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1526, 'learning_rate': 5.283018867924528e-05, 'epoch': 23.58}


                                                   
 47%|████▋     | 2500/5300 [06:04<06:20,  7.37it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2500
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2500\citation_intent_0\head

{'eval_loss': 1.0986543893814087, 'eval_f1': 0.6273504273504273, 'eval_runtime': 0.5165, 'eval_samples_per_second': 220.73, 'eval_steps_per_second': 15.49, 'epoch': 23.58}


 48%|████▊     | 2550/5300 [06:11<05:54,  7.75it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1592, 'learning_rate': 5.188679245283019e-05, 'epoch': 24.06}


                                                   
 48%|████▊     | 2551/5300 [06:12<13:19,  3.44it/s]

{'eval_loss': 1.0670936107635498, 'eval_f1': 0.6311813186813187, 'eval_runtime': 0.5125, 'eval_samples_per_second': 222.454, 'eval_steps_per_second': 15.611, 'epoch': 24.06}


 49%|████▉     | 2600/5300 [06:19<06:14,  7.21it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1225, 'learning_rate': 5.09433962264151e-05, 'epoch': 24.53}


                                                   
 49%|████▉     | 2600/5300 [06:19<06:14,  7.21it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2600
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2600\citation_intent_0\head

{'eval_loss': 1.0613209009170532, 'eval_f1': 0.6680205415499533, 'eval_runtime': 0.5055, 'eval_samples_per_second': 225.538, 'eval_steps_per_second': 15.827, 'epoch': 24.53}


 50%|█████     | 2650/5300 [06:26<06:12,  7.11it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1505, 'learning_rate': 5e-05, 'epoch': 25.0}


                                                   
 50%|█████     | 2651/5300 [06:26<10:49,  4.08it/s]

{'eval_loss': 1.0244582891464233, 'eval_f1': 0.6383958633958634, 'eval_runtime': 0.5095, 'eval_samples_per_second': 223.766, 'eval_steps_per_second': 15.703, 'epoch': 25.0}


 51%|█████     | 2700/5300 [06:33<05:40,  7.64it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1114, 'learning_rate': 4.9056603773584906e-05, 'epoch': 25.47}


                                                   
 51%|█████     | 2700/5300 [06:34<05:40,  7.64it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2700
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2700\citation_intent_0\head

{'eval_loss': 1.0058002471923828, 'eval_f1': 0.725990243192078, 'eval_runtime': 0.5295, 'eval_samples_per_second': 215.305, 'eval_steps_per_second': 15.109, 'epoch': 25.47}


 52%|█████▏    | 2750/5300 [06:40<05:41,  7.48it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1755, 'learning_rate': 4.811320754716982e-05, 'epoch': 25.94}


                                                   
 52%|█████▏    | 2751/5300 [06:41<12:34,  3.38it/s]

{'eval_loss': 1.219325304031372, 'eval_f1': 0.6296584442810857, 'eval_runtime': 0.5085, 'eval_samples_per_second': 224.206, 'eval_steps_per_second': 15.734, 'epoch': 25.94}


 53%|█████▎    | 2800/5300 [06:47<05:21,  7.78it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.142, 'learning_rate': 4.716981132075472e-05, 'epoch': 26.42}


                                                   
 53%|█████▎    | 2800/5300 [06:48<05:21,  7.78it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2800
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2800\citation_intent_0\head

{'eval_loss': 1.1226781606674194, 'eval_f1': 0.6136137254558307, 'eval_runtime': 0.4794, 'eval_samples_per_second': 237.78, 'eval_steps_per_second': 16.686, 'epoch': 26.42}


 54%|█████▍    | 2850/5300 [06:54<05:14,  7.80it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1481, 'learning_rate': 4.6226415094339625e-05, 'epoch': 26.89}


                                                   
 54%|█████▍    | 2851/5300 [06:55<11:20,  3.60it/s]

{'eval_loss': 1.0119484663009644, 'eval_f1': 0.6494177851320708, 'eval_runtime': 0.4794, 'eval_samples_per_second': 237.78, 'eval_steps_per_second': 16.686, 'epoch': 26.89}


 55%|█████▍    | 2900/5300 [07:01<05:08,  7.78it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1321, 'learning_rate': 4.528301886792453e-05, 'epoch': 27.36}


                                                   
 55%|█████▍    | 2900/5300 [07:02<05:08,  7.78it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-2900
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-2900\citation_intent_0\head

{'eval_loss': 1.0287381410598755, 'eval_f1': 0.6301332015526389, 'eval_runtime': 0.4814, 'eval_samples_per_second': 236.791, 'eval_steps_per_second': 16.617, 'epoch': 27.36}


 56%|█████▌    | 2950/5300 [07:08<05:02,  7.77it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1346, 'learning_rate': 4.433962264150944e-05, 'epoch': 27.83}


                                                   
 56%|█████▌    | 2951/5300 [07:09<11:19,  3.45it/s]

{'eval_loss': 1.266907811164856, 'eval_f1': 0.6122342018568433, 'eval_runtime': 0.5105, 'eval_samples_per_second': 223.327, 'eval_steps_per_second': 15.672, 'epoch': 27.83}


 57%|█████▋    | 3000/5300 [07:16<05:37,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1419, 'learning_rate': 4.3396226415094345e-05, 'epoch': 28.3}


                                                   
 57%|█████▋    | 3000/5300 [07:17<05:37,  6.82it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3000
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3000\citation_intent_0\head

{'eval_loss': 1.26505708694458, 'eval_f1': 0.6435037585426042, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 28.3}


 58%|█████▊    | 3050/5300 [07:24<05:28,  6.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0989, 'learning_rate': 4.245283018867925e-05, 'epoch': 28.77}


                                                   
 58%|█████▊    | 3051/5300 [07:25<11:52,  3.16it/s]

{'eval_loss': 1.0831676721572876, 'eval_f1': 0.6664647556635387, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.841, 'eval_steps_per_second': 14.585, 'epoch': 28.77}


 58%|█████▊    | 3100/5300 [07:32<05:20,  6.87it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1377, 'learning_rate': 4.150943396226415e-05, 'epoch': 29.25}


                                                   
 58%|█████▊    | 3100/5300 [07:32<05:20,  6.87it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3100
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3100\citation_intent_0\head

{'eval_loss': 1.1421757936477661, 'eval_f1': 0.6582479386739021, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 29.25}


 59%|█████▉    | 3150/5300 [07:40<05:14,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1286, 'learning_rate': 4.0566037735849064e-05, 'epoch': 29.72}


                                                   
 59%|█████▉    | 3151/5300 [07:40<11:23,  3.14it/s]

{'eval_loss': 1.2083238363265991, 'eval_f1': 0.6422031196818693, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 29.72}


 60%|██████    | 3200/5300 [07:48<05:07,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0755, 'learning_rate': 3.962264150943397e-05, 'epoch': 30.19}


                                                   
 60%|██████    | 3200/5300 [07:48<05:07,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3200
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3200\citation_intent_0\head

{'eval_loss': 1.0367951393127441, 'eval_f1': 0.6775802000307056, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 30.19}


 61%|██████▏   | 3250/5300 [07:56<05:00,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.092, 'learning_rate': 3.867924528301887e-05, 'epoch': 30.66}


                                                   
 61%|██████▏   | 3251/5300 [07:56<10:52,  3.14it/s]

{'eval_loss': 1.1779292821884155, 'eval_f1': 0.6707232442526561, 'eval_runtime': 0.5515, 'eval_samples_per_second': 206.709, 'eval_steps_per_second': 14.506, 'epoch': 30.66}


 62%|██████▏   | 3300/5300 [08:03<04:52,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1258, 'learning_rate': 3.7735849056603776e-05, 'epoch': 31.13}


                                                   
 62%|██████▏   | 3300/5300 [08:04<04:52,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3300
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3300\citation_intent_0\head

{'eval_loss': 1.087796688079834, 'eval_f1': 0.6571231056525174, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.841, 'eval_steps_per_second': 14.585, 'epoch': 31.13}


 63%|██████▎   | 3350/5300 [08:11<04:45,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1258, 'learning_rate': 3.679245283018868e-05, 'epoch': 31.6}


                                                   
 63%|██████▎   | 3351/5300 [08:12<10:23,  3.13it/s]

{'eval_loss': 1.153594970703125, 'eval_f1': 0.6241020778631398, 'eval_runtime': 0.5535, 'eval_samples_per_second': 205.961, 'eval_steps_per_second': 14.453, 'epoch': 31.6}


 64%|██████▍   | 3400/5300 [08:19<04:35,  6.89it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0709, 'learning_rate': 3.5849056603773584e-05, 'epoch': 32.08}


                                                   
 64%|██████▍   | 3400/5300 [08:20<04:35,  6.89it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3400
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3400\citation_intent_0\head

{'eval_loss': 1.0963863134384155, 'eval_f1': 0.653753907072491, 'eval_runtime': 0.5465, 'eval_samples_per_second': 208.602, 'eval_steps_per_second': 14.639, 'epoch': 32.08}


 65%|██████▌   | 3450/5300 [08:27<04:31,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0711, 'learning_rate': 3.490566037735849e-05, 'epoch': 32.55}


                                                   
 65%|██████▌   | 3451/5300 [08:28<09:47,  3.15it/s]

{'eval_loss': 1.135650396347046, 'eval_f1': 0.6416321601104209, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.841, 'eval_steps_per_second': 14.585, 'epoch': 32.55}


 66%|██████▌   | 3500/5300 [08:35<04:03,  7.39it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1158, 'learning_rate': 3.39622641509434e-05, 'epoch': 33.02}


                                                   
 66%|██████▌   | 3500/5300 [08:36<04:03,  7.39it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3500
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3500\citation_intent_0\head

{'eval_loss': 1.2184367179870605, 'eval_f1': 0.6392709511130564, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 33.02}


 67%|██████▋   | 3550/5300 [08:43<04:17,  6.80it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0736, 'learning_rate': 3.30188679245283e-05, 'epoch': 33.49}


                                                   
 67%|██████▋   | 3551/5300 [08:44<09:17,  3.14it/s]

{'eval_loss': 1.214106798171997, 'eval_f1': 0.6107320919820919, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 33.49}


 68%|██████▊   | 3600/5300 [08:51<04:07,  6.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1044, 'learning_rate': 3.207547169811321e-05, 'epoch': 33.96}


                                                   
 68%|██████▊   | 3600/5300 [08:51<04:07,  6.86it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3600
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3600\citation_intent_0\head

{'eval_loss': 1.0801771879196167, 'eval_f1': 0.644785443314855, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.84, 'eval_steps_per_second': 14.585, 'epoch': 33.96}


 69%|██████▉   | 3650/5300 [08:59<04:01,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0868, 'learning_rate': 3.113207547169811e-05, 'epoch': 34.43}


                                                   
 69%|██████▉   | 3651/5300 [09:00<08:43,  3.15it/s]

{'eval_loss': 1.1003069877624512, 'eval_f1': 0.6368863541150931, 'eval_runtime': 0.5485, 'eval_samples_per_second': 207.841, 'eval_steps_per_second': 14.585, 'epoch': 34.43}


 70%|██████▉   | 3700/5300 [09:07<03:56,  6.77it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0892, 'learning_rate': 3.018867924528302e-05, 'epoch': 34.91}


                                                   
 70%|██████▉   | 3700/5300 [09:07<03:56,  6.77it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3700
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3700\citation_intent_0\head

{'eval_loss': 1.1045749187469482, 'eval_f1': 0.6810524672403652, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 34.91}


 71%|███████   | 3750/5300 [09:15<03:46,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0979, 'learning_rate': 2.9245283018867926e-05, 'epoch': 35.38}


                                                   
 71%|███████   | 3751/5300 [09:15<08:15,  3.13it/s]

{'eval_loss': 1.2641587257385254, 'eval_f1': 0.6482114467408585, 'eval_runtime': 0.5565, 'eval_samples_per_second': 204.85, 'eval_steps_per_second': 14.375, 'epoch': 35.38}


 72%|███████▏  | 3800/5300 [09:23<03:41,  6.79it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0947, 'learning_rate': 2.830188679245283e-05, 'epoch': 35.85}


                                                   
 72%|███████▏  | 3800/5300 [09:23<03:41,  6.79it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3800
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3800\citation_intent_0\head

{'eval_loss': 1.3703176975250244, 'eval_f1': 0.6062210777728019, 'eval_runtime': 0.5665, 'eval_samples_per_second': 201.231, 'eval_steps_per_second': 14.121, 'epoch': 35.85}


 73%|███████▎  | 3850/5300 [09:31<03:31,  6.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0407, 'learning_rate': 2.7358490566037738e-05, 'epoch': 36.32}


                                                   
 73%|███████▎  | 3851/5300 [09:31<07:36,  3.18it/s]

{'eval_loss': 1.2504031658172607, 'eval_f1': 0.6426773412067529, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 36.32}


 74%|███████▎  | 3900/5300 [09:39<03:25,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1164, 'learning_rate': 2.641509433962264e-05, 'epoch': 36.79}


                                                   
 74%|███████▎  | 3900/5300 [09:39<03:25,  6.82it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-3900
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-3900\citation_intent_0\head

{'eval_loss': 1.1317627429962158, 'eval_f1': 0.6297750724221313, 'eval_runtime': 0.5445, 'eval_samples_per_second': 209.369, 'eval_steps_per_second': 14.693, 'epoch': 36.79}


 75%|███████▍  | 3950/5300 [09:46<03:11,  7.05it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0897, 'learning_rate': 2.547169811320755e-05, 'epoch': 37.26}


                                                   
 75%|███████▍  | 3951/5300 [09:47<06:54,  3.25it/s]

{'eval_loss': 1.1816195249557495, 'eval_f1': 0.6211827503862901, 'eval_runtime': 0.5315, 'eval_samples_per_second': 214.495, 'eval_steps_per_second': 15.052, 'epoch': 37.26}


 75%|███████▌  | 4000/5300 [09:54<03:10,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0718, 'learning_rate': 2.4528301886792453e-05, 'epoch': 37.74}


                                                   
 75%|███████▌  | 4000/5300 [09:55<03:10,  6.82it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4000
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4000\citation_intent_0\head

{'eval_loss': 1.3358993530273438, 'eval_f1': 0.622075777501741, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 37.74}


 76%|███████▋  | 4050/5300 [10:02<03:02,  6.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0707, 'learning_rate': 2.358490566037736e-05, 'epoch': 38.21}


                                                   
 76%|███████▋  | 4051/5300 [10:03<06:32,  3.18it/s]

{'eval_loss': 1.1493048667907715, 'eval_f1': 0.7211630266141543, 'eval_runtime': 0.5415, 'eval_samples_per_second': 210.53, 'eval_steps_per_second': 14.774, 'epoch': 38.21}


 77%|███████▋  | 4100/5300 [10:10<02:51,  6.99it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0969, 'learning_rate': 2.2641509433962265e-05, 'epoch': 38.68}


                                                   
 77%|███████▋  | 4100/5300 [10:10<02:51,  6.99it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4100
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4100\citation_intent_0\head

{'eval_loss': 1.1858341693878174, 'eval_f1': 0.6308542356336474, 'eval_runtime': 0.5315, 'eval_samples_per_second': 214.495, 'eval_steps_per_second': 15.052, 'epoch': 38.68}


 78%|███████▊  | 4150/5300 [10:18<02:51,  6.70it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0593, 'learning_rate': 2.1698113207547172e-05, 'epoch': 39.15}


                                                   
 78%|███████▊  | 4151/5300 [10:18<06:07,  3.13it/s]

{'eval_loss': 1.192119836807251, 'eval_f1': 0.6160240202275601, 'eval_runtime': 0.5505, 'eval_samples_per_second': 207.085, 'eval_steps_per_second': 14.532, 'epoch': 39.15}


 79%|███████▉  | 4200/5300 [10:26<02:42,  6.76it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0706, 'learning_rate': 2.0754716981132076e-05, 'epoch': 39.62}


                                                   
 79%|███████▉  | 4200/5300 [10:26<02:42,  6.76it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4200
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4200\citation_intent_0\head

{'eval_loss': 1.3666257858276367, 'eval_f1': 0.6285731468394007, 'eval_runtime': 0.5525, 'eval_samples_per_second': 206.335, 'eval_steps_per_second': 14.48, 'epoch': 39.62}


 80%|████████  | 4250/5300 [10:34<02:34,  6.80it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0558, 'learning_rate': 1.9811320754716984e-05, 'epoch': 40.09}


                                                   
 80%|████████  | 4251/5300 [10:34<05:37,  3.11it/s]

{'eval_loss': 1.3154703378677368, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5605, 'eval_samples_per_second': 203.387, 'eval_steps_per_second': 14.273, 'epoch': 40.09}


 81%|████████  | 4300/5300 [10:42<02:26,  6.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0853, 'learning_rate': 1.8867924528301888e-05, 'epoch': 40.57}


                                                   
 81%|████████  | 4300/5300 [10:42<02:26,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4300
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4300\citation_intent_0\head

{'eval_loss': 1.3435299396514893, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 40.57}


 82%|████████▏ | 4350/5300 [10:49<02:12,  7.16it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1002, 'learning_rate': 1.7924528301886792e-05, 'epoch': 41.04}


                                                   
 82%|████████▏ | 4351/5300 [10:50<04:45,  3.33it/s]

{'eval_loss': 1.2734856605529785, 'eval_f1': 0.6145506028498355, 'eval_runtime': 0.5505, 'eval_samples_per_second': 207.085, 'eval_steps_per_second': 14.532, 'epoch': 41.04}


 83%|████████▎ | 4400/5300 [10:57<02:14,  6.72it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0825, 'learning_rate': 1.69811320754717e-05, 'epoch': 41.51}


                                                   
 83%|████████▎ | 4400/5300 [10:58<02:14,  6.72it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4400
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4400\citation_intent_0\head

{'eval_loss': 1.3025991916656494, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5455, 'eval_samples_per_second': 208.985, 'eval_steps_per_second': 14.666, 'epoch': 41.51}


 84%|████████▍ | 4450/5300 [11:05<02:06,  6.71it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0626, 'learning_rate': 1.6037735849056604e-05, 'epoch': 41.98}


                                                   
 84%|████████▍ | 4451/5300 [11:06<04:33,  3.10it/s]

{'eval_loss': 1.2447657585144043, 'eval_f1': 0.6440274977885597, 'eval_runtime': 0.5585, 'eval_samples_per_second': 204.116, 'eval_steps_per_second': 14.324, 'epoch': 41.98}


 85%|████████▍ | 4500/5300 [11:13<01:57,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0833, 'learning_rate': 1.509433962264151e-05, 'epoch': 42.45}


                                                   
 85%|████████▍ | 4500/5300 [11:14<01:57,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4500
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4500\citation_intent_0\head

{'eval_loss': 1.1745134592056274, 'eval_f1': 0.7261451511451512, 'eval_runtime': 0.5475, 'eval_samples_per_second': 208.221, 'eval_steps_per_second': 14.612, 'epoch': 42.45}


 86%|████████▌ | 4550/5300 [11:21<01:49,  6.88it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0548, 'learning_rate': 1.4150943396226415e-05, 'epoch': 42.92}


                                                   
 86%|████████▌ | 4551/5300 [11:22<03:56,  3.17it/s]

{'eval_loss': 1.190550684928894, 'eval_f1': 0.633462225469898, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 42.92}


 87%|████████▋ | 4600/5300 [11:29<01:42,  6.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.1009, 'learning_rate': 1.320754716981132e-05, 'epoch': 43.4}


                                                   
 87%|████████▋ | 4600/5300 [11:30<01:42,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4600
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4600\citation_intent_0\head

{'eval_loss': 1.2144525051116943, 'eval_f1': 0.6619576335424714, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 43.4}


 88%|████████▊ | 4650/5300 [11:37<01:34,  6.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0621, 'learning_rate': 1.2264150943396227e-05, 'epoch': 43.87}


                                                   
 88%|████████▊ | 4651/5300 [11:38<03:24,  3.18it/s]

{'eval_loss': 1.3225982189178467, 'eval_f1': 0.6402353387647506, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 43.87}


 89%|████████▊ | 4700/5300 [11:45<01:27,  6.83it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0598, 'learning_rate': 1.1320754716981132e-05, 'epoch': 44.34}


                                                   
 89%|████████▊ | 4700/5300 [11:45<01:27,  6.83it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4700
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4700\citation_intent_0\head

{'eval_loss': 1.3684161901474, 'eval_f1': 0.6764074999369116, 'eval_runtime': 0.5415, 'eval_samples_per_second': 210.53, 'eval_steps_per_second': 14.774, 'epoch': 44.34}


 90%|████████▉ | 4750/5300 [11:53<01:20,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0704, 'learning_rate': 1.0377358490566038e-05, 'epoch': 44.81}


                                                   
 90%|████████▉ | 4751/5300 [11:53<02:54,  3.15it/s]

{'eval_loss': 1.2909795045852661, 'eval_f1': 0.6582479386739021, 'eval_runtime': 0.5495, 'eval_samples_per_second': 207.462, 'eval_steps_per_second': 14.559, 'epoch': 44.81}


 91%|█████████ | 4800/5300 [12:01<01:13,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0601, 'learning_rate': 9.433962264150944e-06, 'epoch': 45.28}


                                                   
 91%|█████████ | 4800/5300 [12:01<01:13,  6.82it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4800
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4800\citation_intent_0\head

{'eval_loss': 1.3222813606262207, 'eval_f1': 0.6862990810359232, 'eval_runtime': 0.5505, 'eval_samples_per_second': 207.085, 'eval_steps_per_second': 14.532, 'epoch': 45.28}


 92%|█████████▏| 4850/5300 [12:08<01:05,  6.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0769, 'learning_rate': 8.49056603773585e-06, 'epoch': 45.75}


                                                   
 92%|█████████▏| 4851/5300 [12:09<02:21,  3.17it/s]

{'eval_loss': 1.2374166250228882, 'eval_f1': 0.6300849838460457, 'eval_runtime': 0.5425, 'eval_samples_per_second': 210.141, 'eval_steps_per_second': 14.747, 'epoch': 45.75}


 92%|█████████▏| 4900/5300 [12:16<00:58,  6.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0441, 'learning_rate': 7.547169811320755e-06, 'epoch': 46.23}


                                                   
 92%|█████████▏| 4900/5300 [12:17<00:58,  6.86it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-4900
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-4900\citation_intent_0\head

{'eval_loss': 1.2612483501434326, 'eval_f1': 0.6538617471280009, 'eval_runtime': 0.5445, 'eval_samples_per_second': 209.369, 'eval_steps_per_second': 14.693, 'epoch': 46.23}


 93%|█████████▎| 4950/5300 [12:24<00:51,  6.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0574, 'learning_rate': 6.60377358490566e-06, 'epoch': 46.7}


                                                   
 93%|█████████▎| 4951/5300 [12:25<01:50,  3.17it/s]

{'eval_loss': 1.226842999458313, 'eval_f1': 0.6440274977885597, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 46.7}


 94%|█████████▍| 5000/5300 [12:32<00:43,  6.84it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0737, 'learning_rate': 5.660377358490566e-06, 'epoch': 47.17}


                                                   
 94%|█████████▍| 5000/5300 [12:33<00:43,  6.84it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-5000
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5000\citation_intent_0\head

{'eval_loss': 1.3280787467956543, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5395, 'eval_samples_per_second': 211.311, 'eval_steps_per_second': 14.829, 'epoch': 47.17}


 95%|█████████▌| 5050/5300 [12:40<00:36,  6.82it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0732, 'learning_rate': 4.716981132075472e-06, 'epoch': 47.64}


                                                   
 95%|█████████▌| 5051/5300 [12:41<01:18,  3.17it/s]

{'eval_loss': 1.3249869346618652, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5405, 'eval_samples_per_second': 210.92, 'eval_steps_per_second': 14.801, 'epoch': 47.64}


 96%|█████████▌| 5100/5300 [12:48<00:29,  6.78it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0534, 'learning_rate': 3.7735849056603773e-06, 'epoch': 48.11}


                                                   
 96%|█████████▌| 5100/5300 [12:48<00:29,  6.78it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-5100
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5100\citation_intent_0\head

{'eval_loss': 1.3264920711517334, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5515, 'eval_samples_per_second': 206.709, 'eval_steps_per_second': 14.506, 'epoch': 48.11}


 97%|█████████▋| 5150/5300 [12:56<00:22,  6.81it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0668, 'learning_rate': 2.830188679245283e-06, 'epoch': 48.58}


                                                   
 97%|█████████▋| 5151/5300 [12:56<00:46,  3.17it/s]

{'eval_loss': 1.3360289335250854, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5385, 'eval_samples_per_second': 211.704, 'eval_steps_per_second': 14.856, 'epoch': 48.58}


 98%|█████████▊| 5200/5300 [13:04<00:14,  7.02it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0514, 'learning_rate': 1.8867924528301887e-06, 'epoch': 49.06}


                                                   
 98%|█████████▊| 5200/5300 [13:04<00:14,  7.02it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-5200
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5200\citation_intent_0\head

{'eval_loss': 1.3263249397277832, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5405, 'eval_samples_per_second': 210.92, 'eval_steps_per_second': 14.801, 'epoch': 49.06}


 99%|█████████▉| 5250/5300 [13:11<00:07,  6.86it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0577, 'learning_rate': 9.433962264150943e-07, 'epoch': 49.53}


                                                   
 99%|█████████▉| 5251/5300 [13:12<00:15,  3.19it/s]

{'eval_loss': 1.3470484018325806, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5375, 'eval_samples_per_second': 212.098, 'eval_steps_per_second': 14.884, 'epoch': 49.53}


100%|██████████| 5300/5300 [13:19<00:00,  6.85it/s]The following columns in the evaluation set  don't have a corresponding argument in `RobertaAdapterModel.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 114
  Batch size = 16


{'loss': 0.0665, 'learning_rate': 0.0, 'epoch': 50.0}


                                                   
100%|██████████| 5300/5300 [13:20<00:00,  6.85it/s]Saving model checkpoint to ./training_output/finetuning/No_Pretrain\checkpoint-5300
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\checkpoint-5300\citation_intent_0\head

{'eval_loss': 1.3395038843154907, 'eval_f1': 0.6329651102555839, 'eval_runtime': 0.5435, 'eval_samples_per_second': 209.754, 'eval_steps_per_second': 14.72, 'epoch': 50.0}
{'train_runtime': 800.416, 'train_samples_per_second': 105.445, 'train_steps_per_second': 6.622, 'train_loss': 0.2910285633465029, 'epoch': 50.0}


100%|██████████| 9/9 [00:00<00:00, 15.21it/s]
Configuration saved in ./training_output/finetuning/No_Pretrain\citation_intent_0\adapter_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\citation_intent_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\citation_intent_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/No_Pretrain\citation_intent_0\head_config.json
Module weights saved in ./training_output/finetuning/No_Pretrain\citation_intent_0\pytorch_model_head.bin
