In [22]:
#Loading Libraries
# %conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
# %pip install -U adapter-transformers
# %conda install -y -c conda-forge tensorboard
# %pip install optuna
# %pip install tqdm
from tqdm.notebook import tqdm

In [1]:
# Loading dataset
from datasets import load_dataset

scierc_name = 'nsusemiehl/SciERC'
scierc_dataset = load_dataset(scierc_name)
print(scierc_dataset.num_rows)

Using custom data configuration nsusemiehl--SciERC-f57c64a52b9c80c0
Reusing dataset json (C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b)
100%|██████████| 3/3 [00:00<00:00, 999.12it/s]

{'train': 3219, 'test': 974, 'validation': 455}





In [2]:
scierc_dataset['train'][255]

{'text': 'We present two [[ methods ]] for capturing << nonstationary chaos >> , then present a few examples including biological signals , ocean waves and traffic flow .',
 'label': 'USED-FOR',
 'metadata': [3, 3, 6, 7]}

This block creates dataset for pretraining

In [3]:
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
# Tokenize the set for the transformer
def encode_batch_pretraining(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128)

# Encode the input data
# NOTE: num_proc does not seem to work, for some reason it can't find the tokenizer
scierc_dataset_pretraining = scierc_dataset.map(encode_batch_pretraining, 
                                    batched=True, 
                                    remove_columns=scierc_dataset['train'].column_names, 
                                    )

# We make the labels the same as the input as this is language learning 
def add_labels(examples):
  examples["labels"] = examples["input_ids"].copy()
  return examples
  
scierc_dataset_pretraining = scierc_dataset_pretraining.map(add_labels, batched=True)
scierc_dataset_pretraining.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])



Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-74c87c445e04c867.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-876e0d37d233bb91.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-ecd8f07822814f57.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-b972bbb5f00d5d6e.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl-

In [4]:
# Collater adds padding in the form of EOS tokens, makes data augmentations of random masking ('mlm_probability)
from transformers import DataCollatorForLanguageModeling

tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)

Here we are creating the dataset for task finetuning

In [5]:
# Finding the number of labels
import numpy as np
labels = np.unique(np.array(scierc_dataset['train']['label']))
num_of_labels = labels.size

print(labels)
print(num_of_labels)

['COMPARE' 'CONJUNCTION' 'EVALUATE-FOR' 'FEATURE-OF' 'HYPONYM-OF'
 'PART-OF' 'USED-FOR']
7


In [6]:
# encoding the labels
def encode_labels(dataset):
    for i in range(num_of_labels):
        if dataset['label'] == labels[i]:
            dataset['label'] = i
    return dataset

scierc_dataset = scierc_dataset.map(encode_labels)
scierc_dataset['train'][0]

Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-3fa4decd4606a523.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-4d2e52dbe4cdbad6.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-214c6724dd02783d.arrow


{'text': 'The agreement in question involves number in [[ nouns ]] and << reflexive pronouns >> and is syntactic rather than semantic in nature because grammatical number in English , like grammatical gender in languages such as French , is partly arbitrary .',
 'label': 1,
 'metadata': [7, 7, 9, 10]}

In [7]:
def encode_batch_finetuning(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch["text"], max_length=80, truncation=True, padding="max_length")

# Encode the input data
scierc_dataset_finetuning = scierc_dataset.map(encode_batch_finetuning, batched=True)
# The transformers model expects the target class column to be named "labels"
scierc_dataset_finetuning = scierc_dataset_finetuning.rename_column("label", 'labels')
# Transform to pytorch tensors and only output the required columns
scierc_dataset_finetuning.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-6c719cda162c2a70.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-a2e89e74a8a70442.arrow
Loading cached processed dataset at C:\Users\The Doctor\.cache\huggingface\datasets\json\nsusemiehl--SciERC-f57c64a52b9c80c0\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-1e077601566683c7.arrow


# Model Creation

In [8]:
from transformers import RobertaConfig
from transformers import RobertaAdapterModel

def model_init(adapter_name = 'default_adapter', 
               num_lables = 0, 
               pretraining = False,
               load_adapter = False,
               adapter_dir = 'path'):
    """Creates a new roBERTa model with the given name for its adapter.

    Args:
        adapter_name (str): The name of the adapter to load/create. Defaults to 'default_adapter'.
        num_lables (int, optional): The number of labels for classification task. Defaults to 0.
        pretraining (bool, optional): Whether to create a model for pretraining or classification. Defaults to False.
        load_adapter (bool, optional): Whether to load an adapter with the adapter_name given or create a new one. Defaults to False.
        adapter_dir (str, optional): Directory to load the adapter. If load_adapter you need to specify this.  Defaults to 'path'.

    Returns:
        RobertaAdapterModel: A roBERTA model with an adapter added to it.
    """
    
    if pretraining:
        config = RobertaConfig.from_pretrained(
            "roberta-base",
            # num_labels=num_of_labels,*-8536.22.03
        )
        model = RobertaAdapterModel.from_pretrained(
            "roberta-base",
            config=config,
        )
        if load_adapter:
            # Add new adapter
            model.load_adapter(adapter_dir)

        else:
            # Add new adapter
            model.add_adapter(adapter_name)
            
        # Add a matching classification head
        model.add_masked_lm_head(adapter_name)
            
    else:
        config = RobertaConfig.from_pretrained(
            "roberta-base",
            num_labels=num_lables,
        )
        model = RobertaAdapterModel.from_pretrained(
            "roberta-base",
            config=config,
        )
        
        if load_adapter:
            # Add new adapter
            model.load_adapter(adapter_dir)

        else:
            # Add new adapter
            model.add_adapter(adapter_name)
            
        # Add a matching classification head
        model.add_classification_head(
                adapter_name,
                num_labels=num_lables,
                id2label={0:'COMPARE', 1:'CONJUNCTION', 2:'EVALUATE-FOR', 
                        3:'FEATURE-OF', 4:'HYPONYM-OF', 5:'PART-OF', 6:'USED-FOR'},
                overwrite_ok = True)
            
    # Activate the adapter
    model.train_adapter(adapter_name)    
     
    return model

Pretraining Block

In [25]:
from transformers import TrainingArguments, AdapterTrainer
from datasets import load_metric
from torch.utils.tensorboard import SummaryWriter
from transformers.integrations import TensorBoardCallback

import json

def pretraining_loop(num_models, training_args, dataset, 
                     data_collator, adapter_name, 
                    #  DAPT_n_TAPT, TAPT_dataset
                     ):
    """The Loop for running num_models number of models to determine run to run variance. Will run the model 
        and evaluate.

    Args:
        num_models (int): Number of models to loop through
        training_args (transformers.TrainingArguments): The arguments to pass to the trainer
        dataset (dataset): The dataset to train on
        data_collator (data_collator): The data collator for the trainer to use
        adapter_name (str): Name of the adapter to create
    """

    for i in range(num_models):
        adapter = f"{adapter_name}_{i}"
        model = model_init(adapter_name = adapter, pretraining=True)
        
        writer = SummaryWriter(log_dir= f'runs/{adapter}')
        writer = TensorBoardCallback(writer)

        trainer = AdapterTrainer(
            model=model,
            args=training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            data_collator=data_collator,  
            callbacks=[writer] 
        )
        
        trainer.train()
        
        f = open(f"{training_args.output_dir}/evaulations.txt", "a")
        f.write(adapter)
        f.write(json.dumps(trainer.evaluate(dataset['test'])))
        f.write('\n')
        f.close()
        
        model.save_all_adapters(training_args.output_dir, with_head=False)
        # model.save_pretrained(f"{adapter_name}")
        
        # if DAPT_n_TAPT:
        #     trainer = AdapterTrainer(
        #         model=model,
        #         args=training_args,
        #         train_dataset=TAPT_dataset["train"],
        #         eval_dataset=TAPT_dataset["validation"],
        #         data_collator=data_collator,  
        #         callbacks=[writer] 
        #     )
            
        #     trainer.train()
        
        #     f = open("DAPT_TAPT_evaulations.txt", "a")
        #     f.write(adapter_name)
        #     f.write(trainer.evaluate(TAPT_dataset['test']))
        #     f.write('\n')
        #     f.close()
            
        #     model.save_pretrained(f"{adapter_name}_DAPT_TAPT")

DAPT Training

In [10]:
training_args = TrainingArguments(
    learning_rate=5e-4,
    num_train_epochs=1,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=100,
    output_dir="./training_output/pretraining/DAPT",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=True,
    evaluation_strategy = 'steps',
    # load_best_model_at_end = True,
    save_steps = 100,
    gradient_accumulation_steps = 64,
    warmup_ratio = 0.06,
    weight_decay=0.01,
    adam_epsilon = 1e-6,
)

In [None]:
pretraining_loop(num_models = 5, 
                 training_args = training_args, 
                #  dataset = DAPT_dataset, TODO: Need to add DAPT training set
                 data_collator = data_collator, 
                 adapter_name = "DAPT_sci-erc")

DAPT+TAPT Training

TAPT Training

In [10]:
training_args = TrainingArguments(
    learning_rate=0.0001,
    num_train_epochs=100,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=10,
    output_dir="./training_output/pretraining/TAPT",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=True,
    evaluation_strategy = 'steps',
    # load_best_model_at_end = True,
    save_steps = 100,
    gradient_accumulation_steps = 8,
    warmup_ratio = 0.06,
    # load_best_model_at_end = True,
    weight_decay=0.01,
    adam_epsilon = 1e-6,
)

In [11]:
pretraining_loop(num_models = 1, 
                 training_args = training_args, 
                 dataset = scierc_dataset_pretraining, 
                 data_collator = data_collator, 
                 adapter_name = "TAPT_sci-erc")

 98%|█████████▊| 1180/1200 [1:08:51<01:07,  3.37s/it]***** Running Evaluation *****
  Num examples = 455
  Batch size = 32


{'loss': 5.2076, 'learning_rate': 1.7730496453900712e-06, 'epoch': 98.32}


                                                     
 98%|█████████▊| 1180/1200 [1:08:54<01:07,  3.37s/it]

{'eval_loss': 5.05110502243042, 'eval_runtime': 2.7725, 'eval_samples_per_second': 164.111, 'eval_steps_per_second': 5.41, 'epoch': 98.32}


 99%|█████████▉| 1190/1200 [1:09:27<00:35,  3.56s/it]***** Running Evaluation *****
  Num examples = 455
  Batch size = 32


{'loss': 5.221, 'learning_rate': 8.865248226950356e-07, 'epoch': 99.16}


                                                     
 99%|█████████▉| 1190/1200 [1:09:30<00:35,  3.56s/it]

{'eval_loss': 5.058102130889893, 'eval_runtime': 2.7155, 'eval_samples_per_second': 167.559, 'eval_steps_per_second': 5.524, 'epoch': 99.16}


100%|██████████| 1200/1200 [1:10:01<00:00,  3.20s/it]***** Running Evaluation *****
  Num examples = 455
  Batch size = 32


{'loss': 4.9237, 'learning_rate': 0.0, 'epoch': 99.95}


                                                     
100%|██████████| 1200/1200 [1:10:04<00:00,  3.20s/it]Saving model checkpoint to ./training_output/pretraining/TAPT\checkpoint-1200
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\head_config.json


{'eval_loss': 5.0362467765808105, 'eval_runtime': 2.6884, 'eval_samples_per_second': 169.243, 'eval_steps_per_second': 5.579, 'epoch': 99.95}


Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\checkpoint-1200\TAPT_sci-erc_0\pytorch_model_head.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 1200/1200 [1:10:05<00:00,  3.50s/it]
***** Running Evaluation *****
  Num examples = 974
  Batch size = 32


{'train_runtime': 4205.2974, 'train_samples_per_second': 76.546, 'train_steps_per_second': 0.285, 'train_loss': 6.185284856160481, 'epoch': 99.95}


100%|██████████| 31/31 [00:05<00:00,  5.48it/s]
Configuration saved in ./training_output/pretraining/TAPT\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/pretraining/TAPT\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/pretraining/TAPT\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/pretraining/TAPT\TAPT_sci-erc_0\pytorch_model_head.bin


Fine Tuning Models

In [12]:
from datasets import load_metric
metric = load_metric('f1')

def compute_metric(EvalPrediction):
  
  logits, labels = EvalPrediction
  predictions = np.argmax(logits, axis=-1)
  return metric.compute(predictions=predictions, references=labels, average= 'macro')

In [13]:
def finetuning_loop(num_models, training_args, dataset, adapter_name, num_labels, load_adapter = False, adapter_dir = 'Path'):

    for i in range(num_models):
        adapter = f"{adapter_name}_{i}"
        model = model_init(adapter_name = adapter, num_lables = num_labels, pretraining=False, load_adapter = load_adapter, adapter_dir = f"{adapter_dir}/{adapter}")
        
        writer = SummaryWriter(log_dir= f'runs/{adapter}')
        writer = TensorBoardCallback(writer)

        trainer = AdapterTrainer(
            model=model,
            args=training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            callbacks=[writer],
            compute_metrics = compute_metric 
        )
        
        trainer.train()
        
        f = open(f"{training_args.output_dir}/evaulations.txt", "a")
        f.write(adapter)
        f.write(json.dumps(trainer.evaluate(dataset['test'])))
        f.write('\n')
        f.close()
        
        # model.save_pretrained(f"{adapter_name}")
        model.save_all_adapters(training_args.output_dir)
        
        trainer.remove_callback(writer)

DAPT Finetuning

In [12]:
training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=50,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=100,
    output_dir="./training_output/finetuning/DAPT",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
    evaluation_strategy = 'epoch',
    # load_best_model_at_end = True,
    save_steps = 100
)

In [None]:
finetuning_loop(num_models = 5, 
                 training_args = training_args, 
                 dataset = scierc_dataset_finetuning,  
                 adapter_name = "DAPT_sci-erc",
                 load_adapter = True)

DAPT+TAPT Finetuning

In [None]:
training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=50,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=10,
    output_dir="./training_output/finetuning/DAPT_TAPT",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
    evaluation_strategy = 'epoch',
    # load_best_model_at_end = True,
    save_steps = 100
)

In [None]:
finetuning_loop(num_models = 5, 
                 training_args = training_args, 
                 dataset = scierc_dataset_finetuning,  
                 adapter_name = "DAPT_TAPT_sci-erc",
                 load_adapter = True,
                 adapter_dir = "./training_output/pretraining/DAPT_TAPT",
                 num_labels = num_of_labels)

TAPT Finetuning

In [23]:
training_args = TrainingArguments(
    learning_rate=2e-5,
    num_train_epochs=50,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=100,
    output_dir="./training_output/finetuning/TAPT",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
    evaluation_strategy = 'epoch',
    # load_best_model_at_end = True,
    save_steps = 100,
    lr_scheduler_type = 'constant',
    log_level  = 'error'
    
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [24]:
finetuning_loop(num_models = 1, 
                 training_args = training_args, 
                 dataset = scierc_dataset_finetuning,  
                 adapter_name = "TAPT_sci-erc",
                 load_adapter = True,
                 adapter_dir = "./training_output/pretraining/TAPT",
                 num_labels = num_of_labels)

 73%|███████▎  | 7400/10100 [13:56<04:32,  9.90it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7400
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7400\TAPT_sci-erc_0\pytorch_model_head.bin
 73

{'loss': 0.3206, 'learning_rate': 2e-05, 'epoch': 36.63}


 74%|███████▎  | 7426/10100 [13:58<04:35,  9.71it/s]

{'loss': 0.2375, 'learning_rate': 2e-05, 'epoch': 36.76}


 74%|███████▍  | 7452/10100 [14:01<04:30,  9.80it/s]

{'loss': 0.2776, 'learning_rate': 2e-05, 'epoch': 36.88}


 74%|███████▍  | 7473/10100 [14:03<04:23,  9.97it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 74%|███████▍  | 7475/10100 [14:05<18:52,  2.32it/s]

{'eval_loss': 0.5524800419807434, 'eval_f1': 0.7594198593368782, 'eval_runtime': 1.4823, 'eval_samples_per_second': 306.946, 'eval_steps_per_second': 19.564, 'epoch': 37.0}
{'loss': 0.2088, 'learning_rate': 2e-05, 'epoch': 37.0}


 74%|███████▍  | 7500/10100 [14:07<04:36,  9.41it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7500
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7500\TAPT_sci-erc_0\pytorch_model_head.bin
 74

{'loss': 0.227, 'learning_rate': 2e-05, 'epoch': 37.13}


 75%|███████▍  | 7526/10100 [14:10<04:36,  9.30it/s]

{'loss': 0.2796, 'learning_rate': 2e-05, 'epoch': 37.25}


 75%|███████▍  | 7551/10100 [14:13<04:37,  9.17it/s]

{'loss': 0.2343, 'learning_rate': 2e-05, 'epoch': 37.38}


 75%|███████▌  | 7576/10100 [14:15<04:23,  9.59it/s]

{'loss': 0.2014, 'learning_rate': 2e-05, 'epoch': 37.5}


 75%|███████▌  | 7600/10100 [14:18<04:11,  9.94it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7600\TAPT_sci-erc_0\pytorch_model_head.bin
 75

{'loss': 0.2863, 'learning_rate': 2e-05, 'epoch': 37.62}


 76%|███████▌  | 7626/10100 [14:21<04:24,  9.35it/s]

{'loss': 0.1951, 'learning_rate': 2e-05, 'epoch': 37.75}


 76%|███████▌  | 7651/10100 [14:23<04:26,  9.19it/s]

{'loss': 0.22, 'learning_rate': 2e-05, 'epoch': 37.87}


 76%|███████▌  | 7675/10100 [14:26<04:21,  9.26it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16


{'loss': 0.2345, 'learning_rate': 2e-05, 'epoch': 38.0}


                                                    
 76%|███████▌  | 7677/10100 [14:28<18:20,  2.20it/s]

{'eval_loss': 0.45766589045524597, 'eval_f1': 0.807259337132667, 'eval_runtime': 1.5574, 'eval_samples_per_second': 292.151, 'eval_steps_per_second': 18.621, 'epoch': 38.0}


 76%|███████▌  | 7700/10100 [14:30<04:18,  9.29it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7700\TAPT_sci-erc_0\pytorch_model_head.bin
 76

{'loss': 0.2734, 'learning_rate': 2e-05, 'epoch': 38.12}


 76%|███████▋  | 7726/10100 [14:33<04:17,  9.21it/s]

{'loss': 0.2015, 'learning_rate': 2e-05, 'epoch': 38.24}


 77%|███████▋  | 7751/10100 [14:36<04:17,  9.13it/s]

{'loss': 0.1968, 'learning_rate': 2e-05, 'epoch': 38.37}


 77%|███████▋  | 7776/10100 [14:38<04:14,  9.12it/s]

{'loss': 0.2004, 'learning_rate': 2e-05, 'epoch': 38.49}


 77%|███████▋  | 7800/10100 [14:41<04:08,  9.26it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7800\TAPT_sci-erc_0\pytorch_model_head.bin
 77

{'loss': 0.2257, 'learning_rate': 2e-05, 'epoch': 38.61}


 77%|███████▋  | 7826/10100 [14:44<04:12,  9.00it/s]

{'loss': 0.2022, 'learning_rate': 2e-05, 'epoch': 38.74}


 78%|███████▊  | 7851/10100 [14:47<04:21,  8.60it/s]

{'loss': 0.2568, 'learning_rate': 2e-05, 'epoch': 38.86}


 78%|███████▊  | 7876/10100 [14:50<04:14,  8.74it/s]

{'loss': 0.2464, 'learning_rate': 2e-05, 'epoch': 38.99}


 78%|███████▊  | 7877/10100 [14:50<04:10,  8.89it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 78%|███████▊  | 7879/10100 [14:51<17:07,  2.16it/s]

{'eval_loss': 0.47187650203704834, 'eval_f1': 0.8158251532007458, 'eval_runtime': 1.5854, 'eval_samples_per_second': 286.986, 'eval_steps_per_second': 18.291, 'epoch': 39.0}


 78%|███████▊  | 7900/10100 [14:54<03:57,  9.27it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-7900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-7900\TAPT_sci-erc_0\pytorch_model_head.bin
 78

{'loss': 0.2005, 'learning_rate': 2e-05, 'epoch': 39.11}


 78%|███████▊  | 7926/10100 [14:57<03:59,  9.07it/s]

{'loss': 0.1754, 'learning_rate': 2e-05, 'epoch': 39.23}


 79%|███████▊  | 7951/10100 [14:59<03:53,  9.20it/s]

{'loss': 0.2119, 'learning_rate': 2e-05, 'epoch': 39.36}


 79%|███████▉  | 7976/10100 [15:02<03:55,  9.02it/s]

{'loss': 0.2584, 'learning_rate': 2e-05, 'epoch': 39.48}


 79%|███████▉  | 8000/10100 [15:05<03:46,  9.28it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8000
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8000\TAPT_sci-erc_0\pytorch_model_head.bin
 79

{'loss': 0.2469, 'learning_rate': 2e-05, 'epoch': 39.6}


 79%|███████▉  | 8026/10100 [15:08<03:47,  9.12it/s]

{'loss': 0.29, 'learning_rate': 2e-05, 'epoch': 39.73}


 80%|███████▉  | 8051/10100 [15:10<03:43,  9.18it/s]

{'loss': 0.2206, 'learning_rate': 2e-05, 'epoch': 39.85}


 80%|███████▉  | 8076/10100 [15:13<03:41,  9.16it/s]

{'loss': 0.3293, 'learning_rate': 2e-05, 'epoch': 39.98}


 80%|███████▉  | 8079/10100 [15:13<03:40,  9.16it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 80%|████████  | 8081/10100 [15:15<15:36,  2.16it/s]

{'eval_loss': 0.4250684082508087, 'eval_f1': 0.794255993814634, 'eval_runtime': 1.5985, 'eval_samples_per_second': 284.65, 'eval_steps_per_second': 18.143, 'epoch': 40.0}


 80%|████████  | 8100/10100 [15:17<03:56,  8.46it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8100
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8100\TAPT_sci-erc_0\pytorch_model_head.bin


{'loss': 0.1587, 'learning_rate': 2e-05, 'epoch': 40.1}


 80%|████████  | 8126/10100 [15:20<03:33,  9.23it/s]

{'loss': 0.184, 'learning_rate': 2e-05, 'epoch': 40.22}


 81%|████████  | 8151/10100 [15:23<03:34,  9.10it/s]

{'loss': 0.2016, 'learning_rate': 2e-05, 'epoch': 40.35}


 81%|████████  | 8176/10100 [15:26<03:28,  9.21it/s]

{'loss': 0.1989, 'learning_rate': 2e-05, 'epoch': 40.47}


 81%|████████  | 8200/10100 [15:28<03:24,  9.30it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8200
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8200\TAPT_sci-erc_0\pytorch_model_head.bin
 81

{'loss': 0.2291, 'learning_rate': 2e-05, 'epoch': 40.59}


 81%|████████▏ | 8226/10100 [15:31<03:29,  8.94it/s]

{'loss': 0.1948, 'learning_rate': 2e-05, 'epoch': 40.72}


 82%|████████▏ | 8251/10100 [15:34<03:22,  9.13it/s]

{'loss': 0.2377, 'learning_rate': 2e-05, 'epoch': 40.84}


 82%|████████▏ | 8276/10100 [15:36<03:14,  9.37it/s]

{'loss': 0.2656, 'learning_rate': 2e-05, 'epoch': 40.97}


 82%|████████▏ | 8281/10100 [15:37<03:12,  9.43it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 82%|████████▏ | 8283/10100 [15:39<13:27,  2.25it/s]

{'eval_loss': 0.46412673592567444, 'eval_f1': 0.8202669390250374, 'eval_runtime': 1.5264, 'eval_samples_per_second': 298.089, 'eval_steps_per_second': 18.999, 'epoch': 41.0}


 82%|████████▏ | 8300/10100 [15:40<03:14,  9.27it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8300
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8300\TAPT_sci-erc_0\pytorch_model_head.bin
 82

{'loss': 0.2833, 'learning_rate': 2e-05, 'epoch': 41.09}


 82%|████████▏ | 8326/10100 [15:43<03:11,  9.28it/s]

{'loss': 0.1625, 'learning_rate': 2e-05, 'epoch': 41.21}


 83%|████████▎ | 8351/10100 [15:46<03:09,  9.22it/s]

{'loss': 0.217, 'learning_rate': 2e-05, 'epoch': 41.34}


 83%|████████▎ | 8376/10100 [15:49<03:03,  9.37it/s]

{'loss': 0.1454, 'learning_rate': 2e-05, 'epoch': 41.46}


 83%|████████▎ | 8400/10100 [15:51<03:02,  9.33it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8400
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8400\TAPT_sci-erc_0\pytorch_model_head.bin
 83

{'loss': 0.2788, 'learning_rate': 2e-05, 'epoch': 41.58}


 83%|████████▎ | 8426/10100 [15:54<02:58,  9.39it/s]

{'loss': 0.2171, 'learning_rate': 2e-05, 'epoch': 41.71}


 84%|████████▎ | 8451/10100 [15:57<02:56,  9.33it/s]

{'loss': 0.2611, 'learning_rate': 2e-05, 'epoch': 41.83}


 84%|████████▍ | 8476/10100 [15:59<02:55,  9.26it/s]

{'loss': 0.2199, 'learning_rate': 2e-05, 'epoch': 41.96}


 84%|████████▍ | 8483/10100 [16:00<02:52,  9.39it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 84%|████████▍ | 8485/10100 [16:02<11:56,  2.25it/s]

{'eval_loss': 0.44670742750167847, 'eval_f1': 0.8049900510035624, 'eval_runtime': 1.5234, 'eval_samples_per_second': 298.677, 'eval_steps_per_second': 19.037, 'epoch': 42.0}


 84%|████████▍ | 8500/10100 [16:03<02:52,  9.30it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8500
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8500\TAPT_sci-erc_0\pytorch_model_head.bin
 84

{'loss': 0.2161, 'learning_rate': 2e-05, 'epoch': 42.08}


 84%|████████▍ | 8526/10100 [16:06<02:48,  9.32it/s]

{'loss': 0.2055, 'learning_rate': 2e-05, 'epoch': 42.2}


 85%|████████▍ | 8551/10100 [16:09<02:45,  9.38it/s]

{'loss': 0.182, 'learning_rate': 2e-05, 'epoch': 42.33}


 85%|████████▍ | 8576/10100 [16:11<02:43,  9.32it/s]

{'loss': 0.2121, 'learning_rate': 2e-05, 'epoch': 42.45}


 85%|████████▌ | 8600/10100 [16:14<02:39,  9.40it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8600\TAPT_sci-erc_0\pytorch_model_head.bin
 85

{'loss': 0.1728, 'learning_rate': 2e-05, 'epoch': 42.57}


 85%|████████▌ | 8626/10100 [16:17<02:37,  9.37it/s]

{'loss': 0.1195, 'learning_rate': 2e-05, 'epoch': 42.7}


 86%|████████▌ | 8651/10100 [16:19<02:37,  9.20it/s]

{'loss': 0.216, 'learning_rate': 2e-05, 'epoch': 42.82}


 86%|████████▌ | 8676/10100 [16:22<02:34,  9.22it/s]

{'loss': 0.2188, 'learning_rate': 2e-05, 'epoch': 42.95}


 86%|████████▌ | 8685/10100 [16:23<02:32,  9.31it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 86%|████████▌ | 8687/10100 [16:25<10:38,  2.21it/s]

{'eval_loss': 0.44976162910461426, 'eval_f1': 0.8110861079664099, 'eval_runtime': 1.5544, 'eval_samples_per_second': 292.715, 'eval_steps_per_second': 18.657, 'epoch': 43.0}


 86%|████████▌ | 8700/10100 [16:26<02:38,  8.85it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8700\TAPT_sci-erc_0\pytorch_model_head.bin
 86

{'loss': 0.21, 'learning_rate': 2e-05, 'epoch': 43.07}


 86%|████████▋ | 8726/10100 [16:29<02:29,  9.21it/s]

{'loss': 0.2324, 'learning_rate': 2e-05, 'epoch': 43.19}


 87%|████████▋ | 8751/10100 [16:32<02:29,  9.02it/s]

{'loss': 0.1523, 'learning_rate': 2e-05, 'epoch': 43.32}


 87%|████████▋ | 8776/10100 [16:35<02:26,  9.05it/s]

{'loss': 0.1766, 'learning_rate': 2e-05, 'epoch': 43.44}


 87%|████████▋ | 8800/10100 [16:37<02:21,  9.18it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8800\TAPT_sci-erc_0\pytorch_model_head.bin
 87

{'loss': 0.1856, 'learning_rate': 2e-05, 'epoch': 43.56}


 87%|████████▋ | 8826/10100 [16:40<02:17,  9.25it/s]

{'loss': 0.2266, 'learning_rate': 2e-05, 'epoch': 43.69}


 88%|████████▊ | 8851/10100 [16:43<02:13,  9.35it/s]

{'loss': 0.2434, 'learning_rate': 2e-05, 'epoch': 43.81}


 88%|████████▊ | 8876/10100 [16:45<02:13,  9.18it/s]

{'loss': 0.1767, 'learning_rate': 2e-05, 'epoch': 43.94}


 88%|████████▊ | 8887/10100 [16:46<02:10,  9.31it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 88%|████████▊ | 8889/10100 [16:48<09:01,  2.24it/s]

{'eval_loss': 0.5068959593772888, 'eval_f1': 0.8230098116369308, 'eval_runtime': 1.5354, 'eval_samples_per_second': 296.34, 'eval_steps_per_second': 18.888, 'epoch': 44.0}


 88%|████████▊ | 8900/10100 [16:49<02:17,  8.74it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-8900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-8900\TAPT_sci-erc_0\pytorch_model_head.bin
 88

{'loss': 0.208, 'learning_rate': 2e-05, 'epoch': 44.06}


 88%|████████▊ | 8926/10100 [16:52<02:06,  9.27it/s]

{'loss': 0.2388, 'learning_rate': 2e-05, 'epoch': 44.18}


 89%|████████▊ | 8951/10100 [16:55<02:03,  9.32it/s]

{'loss': 0.178, 'learning_rate': 2e-05, 'epoch': 44.31}


 89%|████████▉ | 8976/10100 [16:57<02:00,  9.31it/s]

{'loss': 0.1762, 'learning_rate': 2e-05, 'epoch': 44.43}


 89%|████████▉ | 9000/10100 [17:00<01:56,  9.43it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9000
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9000\TAPT_sci-erc_0\pytorch_model_head.bin
 89

{'loss': 0.2522, 'learning_rate': 2e-05, 'epoch': 44.55}


 89%|████████▉ | 9026/10100 [17:03<01:55,  9.29it/s]

{'loss': 0.1677, 'learning_rate': 2e-05, 'epoch': 44.68}


 90%|████████▉ | 9051/10100 [17:05<01:51,  9.38it/s]

{'loss': 0.2279, 'learning_rate': 2e-05, 'epoch': 44.8}


 90%|████████▉ | 9076/10100 [17:08<01:49,  9.37it/s]

{'loss': 0.2668, 'learning_rate': 2e-05, 'epoch': 44.93}


 90%|████████▉ | 9089/10100 [17:09<01:45,  9.54it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 90%|█████████ | 9091/10100 [17:11<07:27,  2.25it/s]

{'eval_loss': 0.5605481863021851, 'eval_f1': 0.7889465844504617, 'eval_runtime': 1.5264, 'eval_samples_per_second': 298.089, 'eval_steps_per_second': 18.999, 'epoch': 45.0}


 90%|█████████ | 9100/10100 [17:12<02:02,  8.16it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9100
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9100\TAPT_sci-erc_0\pytorch_model_head.bin
 90

{'loss': 0.2227, 'learning_rate': 2e-05, 'epoch': 45.05}


 90%|█████████ | 9126/10100 [17:15<01:44,  9.31it/s]

{'loss': 0.1843, 'learning_rate': 2e-05, 'epoch': 45.17}


 91%|█████████ | 9151/10100 [17:18<01:43,  9.19it/s]

{'loss': 0.1778, 'learning_rate': 2e-05, 'epoch': 45.3}


 91%|█████████ | 9176/10100 [17:20<01:39,  9.33it/s]

{'loss': 0.1943, 'learning_rate': 2e-05, 'epoch': 45.42}


 91%|█████████ | 9200/10100 [17:23<01:34,  9.51it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9200
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9200\TAPT_sci-erc_0\pytorch_model_head.bin
 91

{'loss': 0.2371, 'learning_rate': 2e-05, 'epoch': 45.54}


 91%|█████████▏| 9226/10100 [17:26<01:33,  9.32it/s]

{'loss': 0.1814, 'learning_rate': 2e-05, 'epoch': 45.67}


 92%|█████████▏| 9251/10100 [17:28<01:31,  9.31it/s]

{'loss': 0.1907, 'learning_rate': 2e-05, 'epoch': 45.79}


 92%|█████████▏| 9276/10100 [17:31<01:28,  9.34it/s]

{'loss': 0.1847, 'learning_rate': 2e-05, 'epoch': 45.92}


 92%|█████████▏| 9291/10100 [17:33<01:25,  9.42it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 92%|█████████▏| 9293/10100 [17:34<05:59,  2.25it/s]

{'eval_loss': 0.4775365889072418, 'eval_f1': 0.8255346557453428, 'eval_runtime': 1.5284, 'eval_samples_per_second': 297.699, 'eval_steps_per_second': 18.974, 'epoch': 46.0}


 92%|█████████▏| 9300/10100 [17:35<01:53,  7.05it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9300
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9300\TAPT_sci-erc_0\pytorch_model_head.bin
 92

{'loss': 0.1534, 'learning_rate': 2e-05, 'epoch': 46.04}


 92%|█████████▏| 9326/10100 [17:38<01:22,  9.34it/s]

{'loss': 0.195, 'learning_rate': 2e-05, 'epoch': 46.16}


 93%|█████████▎| 9351/10100 [17:40<01:20,  9.32it/s]

{'loss': 0.2435, 'learning_rate': 2e-05, 'epoch': 46.29}


 93%|█████████▎| 9376/10100 [17:43<01:18,  9.24it/s]

{'loss': 0.2143, 'learning_rate': 2e-05, 'epoch': 46.41}


 93%|█████████▎| 9400/10100 [17:46<01:13,  9.53it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9400
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9400\TAPT_sci-erc_0\pytorch_model_head.bin
 93

{'loss': 0.1066, 'learning_rate': 2e-05, 'epoch': 46.53}


 93%|█████████▎| 9426/10100 [17:48<01:12,  9.33it/s]

{'loss': 0.166, 'learning_rate': 2e-05, 'epoch': 46.66}


 94%|█████████▎| 9451/10100 [17:51<01:09,  9.39it/s]

{'loss': 0.1802, 'learning_rate': 2e-05, 'epoch': 46.78}


 94%|█████████▍| 9476/10100 [17:54<01:06,  9.41it/s]

{'loss': 0.1839, 'learning_rate': 2e-05, 'epoch': 46.91}


 94%|█████████▍| 9493/10100 [17:56<01:03,  9.49it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 94%|█████████▍| 9495/10100 [17:57<04:26,  2.27it/s]

{'eval_loss': 0.5255916118621826, 'eval_f1': 0.8225592312935263, 'eval_runtime': 1.5164, 'eval_samples_per_second': 300.057, 'eval_steps_per_second': 19.125, 'epoch': 47.0}


 94%|█████████▍| 9500/10100 [17:58<01:45,  5.69it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9500
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9500\TAPT_sci-erc_0\pytorch_model_head.bin
 94

{'loss': 0.2224, 'learning_rate': 2e-05, 'epoch': 47.03}


 94%|█████████▍| 9526/10100 [18:01<01:02,  9.24it/s]

{'loss': 0.1786, 'learning_rate': 2e-05, 'epoch': 47.15}


 95%|█████████▍| 9551/10100 [18:03<00:59,  9.18it/s]

{'loss': 0.16, 'learning_rate': 2e-05, 'epoch': 47.28}


 95%|█████████▍| 9576/10100 [18:06<00:56,  9.21it/s]

{'loss': 0.1923, 'learning_rate': 2e-05, 'epoch': 47.4}


 95%|█████████▌| 9600/10100 [18:09<00:54,  9.14it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9600
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9600\TAPT_sci-erc_0\pytorch_model_head.bin
 95

{'loss': 0.2614, 'learning_rate': 2e-05, 'epoch': 47.52}


 95%|█████████▌| 9626/10100 [18:12<00:51,  9.26it/s]

{'loss': 0.1923, 'learning_rate': 2e-05, 'epoch': 47.65}


 96%|█████████▌| 9651/10100 [18:14<00:48,  9.26it/s]

{'loss': 0.1615, 'learning_rate': 2e-05, 'epoch': 47.77}


 96%|█████████▌| 9676/10100 [18:17<00:46,  9.16it/s]

{'loss': 0.1925, 'learning_rate': 2e-05, 'epoch': 47.9}


 96%|█████████▌| 9695/10100 [18:19<00:43,  9.21it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 96%|█████████▌| 9697/10100 [18:21<03:04,  2.19it/s]

{'eval_loss': 0.4788797199726105, 'eval_f1': 0.8264477293573206, 'eval_runtime': 1.5704, 'eval_samples_per_second': 289.73, 'eval_steps_per_second': 18.466, 'epoch': 48.0}


 96%|█████████▌| 9700/10100 [18:21<01:39,  4.02it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9700
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9700\TAPT_sci-erc_0\pytorch_model_head.bin
 96

{'loss': 0.1301, 'learning_rate': 2e-05, 'epoch': 48.02}


 96%|█████████▋| 9726/10100 [18:24<00:43,  8.62it/s]

{'loss': 0.1645, 'learning_rate': 2e-05, 'epoch': 48.14}


 97%|█████████▋| 9751/10100 [18:27<00:37,  9.20it/s]

{'loss': 0.1975, 'learning_rate': 2e-05, 'epoch': 48.27}


 97%|█████████▋| 9776/10100 [18:29<00:34,  9.27it/s]

{'loss': 0.1741, 'learning_rate': 2e-05, 'epoch': 48.39}


 97%|█████████▋| 9800/10100 [18:32<00:32,  9.30it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9800
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9800\TAPT_sci-erc_0\pytorch_model_head.bin
 97

{'loss': 0.1739, 'learning_rate': 2e-05, 'epoch': 48.51}


 97%|█████████▋| 9826/10100 [18:35<00:29,  9.14it/s]

{'loss': 0.2203, 'learning_rate': 2e-05, 'epoch': 48.64}


 98%|█████████▊| 9851/10100 [18:38<00:27,  9.20it/s]

{'loss': 0.1973, 'learning_rate': 2e-05, 'epoch': 48.76}


 98%|█████████▊| 9876/10100 [18:40<00:24,  9.17it/s]

{'loss': 0.1252, 'learning_rate': 2e-05, 'epoch': 48.89}


 98%|█████████▊| 9897/10100 [18:43<00:21,  9.29it/s]***** Running Evaluation *****
  Num examples = 455
  Batch size = 16
                                                    
 98%|█████████▊| 9899/10100 [18:44<01:30,  2.21it/s]

{'eval_loss': 0.4998268783092499, 'eval_f1': 0.8095116139931723, 'eval_runtime': 1.5564, 'eval_samples_per_second': 292.338, 'eval_steps_per_second': 18.633, 'epoch': 49.0}


 98%|█████████▊| 9900/10100 [18:44<01:13,  2.73it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-9900
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-9900\TAPT_sci-erc_0\pytorch_model_head.bin


{'loss': 0.1516, 'learning_rate': 2e-05, 'epoch': 49.01}


 98%|█████████▊| 9926/10100 [18:47<00:18,  9.19it/s]

{'loss': 0.1883, 'learning_rate': 2e-05, 'epoch': 49.13}


 99%|█████████▊| 9951/10100 [18:50<00:16,  9.19it/s]

{'loss': 0.1324, 'learning_rate': 2e-05, 'epoch': 49.26}


 99%|█████████▉| 9976/10100 [18:53<00:13,  9.14it/s]

{'loss': 0.1158, 'learning_rate': 2e-05, 'epoch': 49.38}


 99%|█████████▉| 10000/10100 [18:55<00:10,  9.28it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-10000
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10000\TAPT_sci-erc_0\pytorch_model_he

{'loss': 0.1571, 'learning_rate': 2e-05, 'epoch': 49.5}


 99%|█████████▉| 10026/10100 [18:58<00:08,  9.17it/s]

{'loss': 0.1625, 'learning_rate': 2e-05, 'epoch': 49.63}


100%|█████████▉| 10051/10100 [19:01<00:05,  9.18it/s]

{'loss': 0.1399, 'learning_rate': 2e-05, 'epoch': 49.75}


100%|█████████▉| 10076/10100 [19:03<00:02,  9.23it/s]

{'loss': 0.2008, 'learning_rate': 2e-05, 'epoch': 49.88}


100%|██████████| 10100/10100 [19:06<00:00,  9.36it/s]Saving model checkpoint to ./training_output/finetuning/TAPT\checkpoint-10100
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\checkpoint-10100\TAPT_sci-erc_0\pytorch_model_he

{'loss': 0.0939, 'learning_rate': 2e-05, 'epoch': 50.0}


                                                     
100%|██████████| 10100/10100 [19:08<00:00,  9.36it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


100%|██████████| 10100/10100 [19:08<00:00,  8.80it/s]
***** Running Evaluation *****
  Num examples = 974
  Batch size = 16


{'eval_loss': 0.5171557664871216, 'eval_f1': 0.8222261813681893, 'eval_runtime': 1.5634, 'eval_samples_per_second': 291.028, 'eval_steps_per_second': 18.549, 'epoch': 50.0}
{'train_runtime': 1148.1544, 'train_samples_per_second': 140.181, 'train_steps_per_second': 8.797, 'train_loss': 0.5226877824622805, 'epoch': 50.0}


100%|██████████| 61/61 [00:03<00:00, 18.78it/s]
Configuration saved in ./training_output/finetuning/TAPT\TAPT_sci-erc_0\adapter_config.json
Module weights saved in ./training_output/finetuning/TAPT\TAPT_sci-erc_0\pytorch_adapter.bin
Configuration saved in ./training_output/finetuning/TAPT\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\TAPT_sci-erc_0\pytorch_model_head.bin
Configuration saved in ./training_output/finetuning/TAPT\TAPT_sci-erc_0\head_config.json
Module weights saved in ./training_output/finetuning/TAPT\TAPT_sci-erc_0\pytorch_model_head.bin


Only Finetuning

In [None]:
training_args = TrainingArguments(
    learning_rate=2e-5,
    num_train_epochs=50,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=100,
    output_dir="./training_output/finetuning/No_Pretrain",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=True,
    evaluation_strategy = 'steps',
    # load_best_model_at_end = True,
    save_steps = 100,
    lr_scheduler_type = 'constant',
)

using `logging_steps` to initialize `eval_steps` to 100
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
finetuning_loop(num_models = 1, 
                 training_args = training_args, 
                 dataset = scierc_dataset_finetuning,  
                 adapter_name = "sci-erc",
                 load_adapter = False,
                 num_labels = num_of_labels)