# Parameter-Efficient Fine-Tuning (PEFT) on JuL lyrics

## Installation

In [3]:
! pip install datasets transformers accelerate evaluate seqeval # HuggingFace 🤗
! pip install -U adapter-transformers # Adapter-Transformers
! pip install sentencepiece # Required for Camembert-ner (slow tokenizer)
! apt install git-lfs # To upload fine-tuned model to HuggingFace Hub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Reading package lists... Done
Building dependency tree       
Reading state information... Done
git-lfs is already the newest version (2.9.2-1).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.


In [4]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Libraries

In [5]:
from datasets import DatasetDict, Dataset
import transformers
from transformers import (AutoTokenizer, 
                          RobertaForTokenClassification, 
                          TrainingArguments, 
                          AdapterTrainer, 
                          DataCollatorForTokenClassification,
                          pipeline,
                          TokenClassificationPipeline)
from transformers.adapters import PrefixTuningConfig, LoRAConfig, AdapterConfig
from transformers.integrations import TensorBoardCallback
import numpy as np
import evaluate

## Functions

In [6]:
def iob_to_dataset(lines, split):
  ''' 
  Function to convert each line of a txt file in the IOB format 
  into the format expected by camembert-ner and HuggingFace dataset 
  '''
  # Define tag to ID mapping
  tag2id = {'O': 0, 'LOC': 1, 'PER': 2, 'MISC': 3, 'ORG': 4}

  # Group IOB-formatted lines into sentences
  sentences = []
  sentence = []
  for line in lines:
    line = line.strip()
    if line:
      token, tag = line.split()
      sentence.append((token, tag))
    else:
      sentences.append(sentence)
      sentence = []
  if sentence:
    sentences.append(sentence)

  # Merge tokens and NER tags for each sentence
  tokens = []
  ner_tags = []
  for sentence in sentences:
    sentence_tokens, sentence_tags = zip(*sentence)
    tokens.append(' '.join(sentence_tokens))
    # Remove IOB tag prefixes for camembert-ner
    ner_tags.append([tag2id[tag.replace('B-', '').replace('I-', '')] for tag in sentence_tags])

  # Create a dictionary
  dataset_dict = {"id": list(range(len(tokens))),
                  "tokens": tokens,
                  "ner_tags": ner_tags}

  # Return the dataset as a Hugging Face Dataset object
  return Dataset.from_dict(dataset_dict)



def tokenize_and_align_labels(examples):
  '''
  Function to align labels with token ids
  '''
  label_all_tokens = True
  tokenized_inputs = tokenizer(examples["tokens"], 
                               truncation=True)

  labels = []
  for i, label in enumerate(examples["ner_tags"]):
    word_ids = tokenized_inputs.word_ids(batch_index=i)
    previous_word_idx = None
    label_ids = []
    for word_idx in word_ids:
      # Set labels of special tokens to -100 (index ignored by PyTorch)
      if word_idx is None:
        label_ids.append(-100)
      # Set label for the first token of each word
      elif word_idx != previous_word_idx:
        label_ids.append(label[word_idx])
      # Set the label to either the current label or -100
      else:
        label_ids.append(label[word_idx] if label_all_tokens else -100)
      previous_word_idx = word_idx

    labels.append(label_ids)

  tokenized_inputs["labels"] = labels
  return tokenized_inputs



def compute_metrics(p):
  '''
  Function to compute metrics on predictions
  '''
  predictions, labels = p
  predictions = np.argmax(predictions, axis=2)

  # camembert-ner needs tags without prefixes but seqeval needs it so we add "I-"
  label_list = ['O', 'I-LOC', 'I-PER', 'I-MISC', 'I-ORG']

  # Remove ignored index (special tokens)
  true_predictions = [
      [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
      for prediction, label in zip(predictions, labels)]
  true_labels = [
      [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
      for prediction, label in zip(predictions, labels)]

  results = seqeval.compute(predictions=true_predictions, references=true_labels)
  return results

## Load files

In [7]:
#Import file
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving data.txt to data.txt
User uploaded file "data.txt" with length 737193 bytes


In [8]:
# Load data
with open("/content/data.txt", "r", encoding="utf-8") as f:
    data_lines = f.readlines()

## Preprocessing

In [9]:
# Convert IOB formatted file into the format required
data = iob_to_dataset(data_lines, "data")

# Create a DatasetDict object
dataset = DatasetDict({"data": data})

# Split data into train, valid and test sets
ds_train_devtest = dataset["data"].train_test_split(test_size=0.4, train_size=0.6, seed=7)
ds_devtest = ds_train_devtest["test"].train_test_split(test_size=0.5, seed=7)

datasets = DatasetDict({"train": ds_train_devtest["train"], # 60%
                        "valid": ds_devtest["train"], # 20%
                        "test": ds_devtest["test"]}) # 20%

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("Jean-Baptiste/camembert-ner")

# Tokenize and align labels of train, validation and test sets
tokenized_datasets = datasets.map(tokenize_and_align_labels, batched=True)
tokenized_datasets

Map:   0%|          | 0/6518 [00:00<?, ? examples/s]

Map:   0%|          | 0/2173 [00:00<?, ? examples/s]

Map:   0%|          | 0/2173 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 6518
    })
    valid: Dataset({
        features: ['id', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 2173
    })
    test: Dataset({
        features: ['id', 'tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 2173
    })
})

## PEFT with Prefix tuning

In [10]:
# Label list
label_list = ['O', 'LOC', 'PER', 'MISC', 'ORG'] # {'O': 0, 'LOC': 1, 'PER': 2, 'MISC': 3, 'ORG': 4}

# Load model
model = RobertaForTokenClassification.from_pretrained("Jean-Baptiste/camembert-ner", num_labels=5)

# Add a new adapter to it
model.add_adapter("prefix_jul", config=PrefixTuningConfig(prefix_length=50))

# Activate the adapter
model.train_adapter("prefix_jul")

# Define training arguments
args = TrainingArguments("peft-prefix-jul",
                         learning_rate=2e-4,
                         num_train_epochs=10,
                         per_device_train_batch_size=16,
                         per_device_eval_batch_size=16,
                         push_to_hub=True)

# Batch processed examples together while applying padding to make them the same size
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Load seqeval metric commonly used to evaluate results on CONLL
seqeval = evaluate.load("seqeval")

# Load trainer
trainer = AdapterTrainer(model,
                         args,
                         train_dataset=tokenized_datasets["train"],
                         eval_dataset=tokenized_datasets["valid"],
                         data_collator=data_collator,
                         tokenizer=tokenizer,
                         compute_metrics=compute_metrics,
                         callbacks=[TensorBoardCallback()])

You are using a model of type camembert to instantiate a model of type roberta. This is not supported for all configurations of models and can yield errors.


Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

You are adding a <class 'transformers.integrations.TensorBoardCallback'> to the callbacks of this Trainer, but there is already one. The currentlist of callbacks is
:DefaultFlowCallback
TensorBoardCallback
AdapterTrainerCallback
Cloning https://huggingface.co/fgiauna/peft-prefix-jul into local empty directory.


Download file prefix_jul/pytorch_adapter.bin:   0%|          | 16.5k/37.7M [00:00<?, ?B/s]

Download file prefix_jul/pytorch_model_head.bin: 100%|##########| 16.1k/16.1k [00:00<?, ?B/s]

Download file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135389.a7551e05f324.265.2: 100%|#######…

Download file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135389.a7551e05f324.265.0: 100%|#######…

Download file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683996948.08c80963a5b7.521.12: 100%|######…

Download file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683996948.08c80963a5b7.521.14: 100%|######…

Download file sentencepiece.bpe.model:   1%|1         | 8.00k/792k [00:00<?, ?B/s]

Download file runs/May13_16-55-32_08c80963a5b7/1683996948.1100657/events.out.tfevents.1683996948.08c80963a5b7.…

Clean file prefix_jul/pytorch_model_head.bin:   6%|6         | 1.00k/16.1k [00:00<?, ?B/s]

Clean file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135389.a7551e05f324.265.2:  16%|#6        …

Clean file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135389.a7551e05f324.265.0:  16%|#6        …

Clean file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683996948.08c80963a5b7.521.12:  16%|#6       …

Download file training_args.bin: 100%|##########| 3.37k/3.37k [00:00<?, ?B/s]

Clean file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683996948.08c80963a5b7.521.14:  16%|#6       …

Clean file runs/May13_16-55-32_08c80963a5b7/1683996948.1100657/events.out.tfevents.1683996948.08c80963a5b7.521…

Clean file training_args.bin:  30%|##9       | 1.00k/3.37k [00:00<?, ?B/s]

Download file runs/May15_07-22-52_a7551e05f324/1684135389.5735202/events.out.tfevents.1684135389.a7551e05f324.…

Clean file runs/May15_07-22-52_a7551e05f324/1684135389.5735202/events.out.tfevents.1684135389.a7551e05f324.265…

Download file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135764.a7551e05f324.265.4: 100%|#######…

Download file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683997337.08c80963a5b7.521.17: 100%|######…

Download file runs/May13_16-55-32_08c80963a5b7/1683996948.1042335/events.out.tfevents.1683996948.08c80963a5b7.…

Clean file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683997337.08c80963a5b7.521.17: 100%|#########…

Clean file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135764.a7551e05f324.265.4: 100%|##########…

Clean file runs/May13_16-55-32_08c80963a5b7/1683996948.1042335/events.out.tfevents.1683996948.08c80963a5b7.521…

Download file runs/May15_07-22-52_a7551e05f324/1684135389.5680196/events.out.tfevents.1684135389.a7551e05f324.…

Clean file runs/May15_07-22-52_a7551e05f324/1684135389.5680196/events.out.tfevents.1684135389.a7551e05f324.265…

Clean file sentencepiece.bpe.model:   0%|          | 1.00k/792k [00:00<?, ?B/s]

Download file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135764.a7551e05f324.265.5: 100%|#######…

Clean file runs/May15_07-22-52_a7551e05f324/events.out.tfevents.1684135764.a7551e05f324.265.5: 100%|##########…

Download file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683997336.08c80963a5b7.521.16: 100%|######…

Clean file runs/May13_16-55-32_08c80963a5b7/events.out.tfevents.1683997336.08c80963a5b7.521.16: 100%|#########…

Clean file prefix_jul/pytorch_adapter.bin:   0%|          | 1.00k/37.7M [00:00<?, ?B/s]

In [11]:
# Train 
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 6518
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 4080
  Number of trainable parameters = 9891589
You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.1152
1000,0.0726
1500,0.058
2000,0.0464
2500,0.0374
3000,0.0269
3500,0.0221
4000,0.0154


Saving model checkpoint to peft-prefix-jul/checkpoint-500
Configuration saved in peft-prefix-jul/checkpoint-500/prefix_jul/adapter_config.json
Module weights saved in peft-prefix-jul/checkpoint-500/prefix_jul/pytorch_adapter.bin
Configuration saved in peft-prefix-jul/checkpoint-500/prefix_jul/head_config.json
Module weights saved in peft-prefix-jul/checkpoint-500/prefix_jul/pytorch_model_head.bin
tokenizer config file saved in peft-prefix-jul/checkpoint-500/tokenizer_config.json
Special tokens file saved in peft-prefix-jul/checkpoint-500/special_tokens_map.json
tokenizer config file saved in peft-prefix-jul/tokenizer_config.json
Special tokens file saved in peft-prefix-jul/special_tokens_map.json
Saving model checkpoint to peft-prefix-jul/checkpoint-1000
Configuration saved in peft-prefix-jul/checkpoint-1000/prefix_jul/adapter_config.json
Module weights saved in peft-prefix-jul/checkpoint-1000/prefix_jul/pytorch_adapter.bin
Configuration saved in peft-prefix-jul/checkpoint-1000/prefix_

TrainOutput(global_step=4080, training_loss=0.048616089452715484, metrics={'train_runtime': 374.2341, 'train_samples_per_second': 174.169, 'train_steps_per_second': 10.902, 'total_flos': 1050137907160740.0, 'train_loss': 0.048616089452715484, 'epoch': 10.0})

In [12]:
# Evaluate
trainer.evaluate(eval_dataset=tokenized_datasets["test"])

The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2173
  Batch size = 16


Trainer is attempting to log a value of "{'precision': 0.711864406779661, 'recall': 0.7777777777777778, 'f1': 0.7433628318584071, 'number': 216}" of type <class 'dict'> for key "eval/LOC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.6956521739130435, 'recall': 0.4, 'f1': 0.507936507936508, 'number': 40}" of type <class 'dict'> for key "eval/MISC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8097560975609757, 'recall': 0.83, 'f1': 0.8197530864197532, 'number': 200}" of type <class 'dict'> for key "eval/ORG" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.8260869565217391, 'recall': 0.7755102040816326, 'f1': 0.8, 'number': 196}" of ty

{'eval_loss': 0.0857028067111969,
 'eval_LOC': {'precision': 0.711864406779661,
  'recall': 0.7777777777777778,
  'f1': 0.7433628318584071,
  'number': 216},
 'eval_MISC': {'precision': 0.6956521739130435,
  'recall': 0.4,
  'f1': 0.507936507936508,
  'number': 40},
 'eval_ORG': {'precision': 0.8097560975609757,
  'recall': 0.83,
  'f1': 0.8197530864197532,
  'number': 200},
 'eval_PER': {'precision': 0.8260869565217391,
  'recall': 0.7755102040816326,
  'f1': 0.8,
  'number': 196},
 'eval_overall_precision': 0.7746913580246914,
 'eval_overall_recall': 0.7699386503067485,
 'eval_overall_f1': 0.7723076923076923,
 'eval_overall_accuracy': 0.9821653175089496,
 'eval_runtime': 5.9091,
 'eval_samples_per_second': 367.738,
 'eval_steps_per_second': 23.015,
 'epoch': 10.0}

In [13]:
# Upload the result of the training to HuggingFace Hub
trainer.push_to_hub(commit_message="Training complete")

Saving model checkpoint to peft-prefix-jul
Configuration saved in peft-prefix-jul/prefix_jul/adapter_config.json
Module weights saved in peft-prefix-jul/prefix_jul/pytorch_adapter.bin
Configuration saved in peft-prefix-jul/prefix_jul/head_config.json
Module weights saved in peft-prefix-jul/prefix_jul/pytorch_model_head.bin
tokenizer config file saved in peft-prefix-jul/tokenizer_config.json
Special tokens file saved in peft-prefix-jul/special_tokens_map.json


Upload file prefix_jul/pytorch_adapter.bin:   0%|          | 1.00/37.7M [00:00<?, ?B/s]

Upload file runs/May15_08-33-06_228946966f8c/events.out.tfevents.1684139993.228946966f8c.1971.4:   0%|        …

Upload file prefix_jul/pytorch_model_head.bin:   0%|          | 1.00/16.1k [00:00<?, ?B/s]

Upload file runs/May15_08-33-06_228946966f8c/events.out.tfevents.1684139993.228946966f8c.1971.5:   0%|        …

Upload file runs/May15_08-33-06_228946966f8c/events.out.tfevents.1684139613.228946966f8c.1971.2:   0%|        …

Upload file runs/May15_08-33-06_228946966f8c/events.out.tfevents.1684139613.228946966f8c.1971.0:   0%|        …

To https://huggingface.co/fgiauna/peft-prefix-jul
   27a5b1a..3fccc1c  main -> main

   27a5b1a..3fccc1c  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Token Classification', 'type': 'token-classification'}}
To https://huggingface.co/fgiauna/peft-prefix-jul
   3fccc1c..2cd2e3f  main -> main

   3fccc1c..2cd2e3f  main -> main



'https://huggingface.co/fgiauna/peft-prefix-jul/commit/3fccc1c5a9e81a9af80f0eeeb04e88c78b4bafea'

## PEFT with LoRA

In [None]:
# Label list
label_list = ['O', 'PER', 'ORG', 'LOC', 'MISC'] # 'O': 0, 'PER': 1, 'ORG': 2, 'LOC': 3, 'MISC': 4

# Load model
model = RobertaForTokenClassification.from_pretrained("Jean-Baptiste/camembert-ner", num_labels=5)

# Add a new adapter to it
model.add_adapter("lora_jul", config=LoRAConfig(r=8, alpha=8))

# Activate the adapter
model.train_adapter("lora_jul")

# Define training arguments
args = TrainingArguments("peft-lora-jul",
                         learning_rate=2e-4,
                         num_train_epochs=10,
                         per_device_train_batch_size=16,
                         per_device_eval_batch_size=16,
                         push_to_hub=True)

# Batch processed examples together while applying padding to make them the same size
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Load seqeval metric commonly used to evaluate results on CONLL
seqeval = evaluate.load("seqeval")

# Load trainer
trainer = AdapterTrainer(model,
                         args,
                         train_dataset=tokenized_datasets["train"],
                         eval_dataset=tokenized_datasets["valid"],
                         data_collator=data_collator,
                         tokenizer=tokenizer,
                         compute_metrics=compute_metrics,
                         callbacks=[TensorBoardCallback()])

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner/snapshots/cc63721791a6e1d60f4764997bbb311667ec75d8/config.json
You are using a model of type camembert to instantiate a model of type roberta. This is not supported for all configurations of models and can yield errors.
Model config RobertaConfig {
  "_name_or_path": "camembert-base",
  "architectures": [
    "CamembertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 5,
  "classifier_dropout": null,
  "eos_token_id": 6,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "O",
    "1": "I-LOC",
    "2": "I-PER",
    "3": "I-MISC",
    "4": "I-ORG"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "I-LOC": 1,
    "I-MISC": 3,
    "I-ORG": 4,
    "I-PER": 2,
    "O": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embe

In [None]:
# Train 
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 6518
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 4080
  Number of trainable parameters = 298757


Step,Training Loss
500,0.1535
1000,0.0973
1500,0.0789
2000,0.0735
2500,0.0659
3000,0.0613
3500,0.0583
4000,0.0569


Saving model checkpoint to peft-lora-jul/checkpoint-500
Configuration saved in peft-lora-jul/checkpoint-500/lora_jul/adapter_config.json
Module weights saved in peft-lora-jul/checkpoint-500/lora_jul/pytorch_adapter.bin
Configuration saved in peft-lora-jul/checkpoint-500/lora_jul/head_config.json
Module weights saved in peft-lora-jul/checkpoint-500/lora_jul/pytorch_model_head.bin
tokenizer config file saved in peft-lora-jul/checkpoint-500/tokenizer_config.json
Special tokens file saved in peft-lora-jul/checkpoint-500/special_tokens_map.json
tokenizer config file saved in peft-lora-jul/tokenizer_config.json
Special tokens file saved in peft-lora-jul/special_tokens_map.json
Saving model checkpoint to peft-lora-jul/checkpoint-1000
Configuration saved in peft-lora-jul/checkpoint-1000/lora_jul/adapter_config.json
Module weights saved in peft-lora-jul/checkpoint-1000/lora_jul/pytorch_adapter.bin
Configuration saved in peft-lora-jul/checkpoint-1000/lora_jul/head_config.json
Module weights save

TrainOutput(global_step=4080, training_loss=0.08026144633106157, metrics={'train_runtime': 334.2136, 'train_samples_per_second': 195.025, 'train_steps_per_second': 12.208, 'total_flos': 944421365587620.0, 'train_loss': 0.08026144633106157, 'epoch': 10.0})

In [None]:
# Train 
trainer.evaluate(eval_dataset=tokenized_datasets["test"])

The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2173
  Batch size = 16


Trainer is attempting to log a value of "{'precision': 0.5887445887445888, 'recall': 0.6296296296296297, 'f1': 0.6085011185682326, 'number': 216}" of type <class 'dict'> for key "eval/LOC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.6111111111111112, 'recall': 0.275, 'f1': 0.3793103448275862, 'number': 40}" of type <class 'dict'> for key "eval/MISC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7004830917874396, 'recall': 0.725, 'f1': 0.7125307125307125, 'number': 200}" of type <class 'dict'> for key "eval/ORG" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7540106951871658, 'recall': 0.7193877551020408, 'f1': 0.7362924281984334, '

{'eval_loss': 0.0816732719540596,
 'eval_LOC': {'precision': 0.5887445887445888,
  'recall': 0.6296296296296297,
  'f1': 0.6085011185682326,
  'number': 216},
 'eval_MISC': {'precision': 0.6111111111111112,
  'recall': 0.275,
  'f1': 0.3793103448275862,
  'number': 40},
 'eval_ORG': {'precision': 0.7004830917874396,
  'recall': 0.725,
  'f1': 0.7125307125307125,
  'number': 200},
 'eval_PER': {'precision': 0.7540106951871658,
  'recall': 0.7193877551020408,
  'f1': 0.7362924281984334,
  'number': 196},
 'eval_overall_precision': 0.6734059097978227,
 'eval_overall_recall': 0.6641104294478528,
 'eval_overall_f1': 0.6687258687258687,
 'eval_overall_accuracy': 0.9771664462863224,
 'eval_runtime': 4.6816,
 'eval_samples_per_second': 464.16,
 'eval_steps_per_second': 29.05,
 'epoch': 10.0}

In [None]:
# Upload the result of the training to HuggingFace Hub
trainer.push_to_hub(commit_message="Training complete")

Saving model checkpoint to peft-lora-jul
Configuration saved in peft-lora-jul/lora_jul/adapter_config.json
Module weights saved in peft-lora-jul/lora_jul/pytorch_adapter.bin
Configuration saved in peft-lora-jul/lora_jul/head_config.json
Module weights saved in peft-lora-jul/lora_jul/pytorch_model_head.bin
tokenizer config file saved in peft-lora-jul/tokenizer_config.json
Special tokens file saved in peft-lora-jul/special_tokens_map.json


Upload file lora_jul/pytorch_adapter.bin:   3%|2         | 32.0k/1.14M [00:00<?, ?B/s]

Upload file runs/May13_17-05-05_08c80963a5b7/events.out.tfevents.1683997856.08c80963a5b7.521.23: 100%|########…

Upload file runs/May13_17-05-05_08c80963a5b7/events.out.tfevents.1683997517.08c80963a5b7.521.18: 100%|########…

Upload file lora_jul/pytorch_model_head.bin: 100%|##########| 16.1k/16.1k [00:00<?, ?B/s]

Upload file runs/May13_17-05-05_08c80963a5b7/events.out.tfevents.1683997517.08c80963a5b7.521.20: 100%|########…

To https://huggingface.co/fgiauna/peft-lora-jul
   0f3bde2..bf64a25  main -> main

   0f3bde2..bf64a25  main -> main



Upload file runs/May13_17-05-05_08c80963a5b7/events.out.tfevents.1683997856.08c80963a5b7.521.22: 100%|########…

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Token Classification', 'type': 'token-classification'}}
To https://huggingface.co/fgiauna/peft-lora-jul
   bf64a25..00183c3  main -> main

   bf64a25..00183c3  main -> main



'https://huggingface.co/fgiauna/peft-lora-jul/commit/bf64a25ab5d9e398965c8f1ffe29d2f3674b13bd'

## PEFT with Adapter

In [None]:
# Label list
label_list = ['O', 'PER', 'ORG', 'LOC', 'MISC'] # 'O': 0, 'PER': 1, 'ORG': 2, 'LOC': 3, 'MISC': 4

# Load model
model = RobertaForTokenClassification.from_pretrained("Jean-Baptiste/camembert-ner", num_labels=5)

# Add a new adapter to it
model.add_adapter("adapter_jul", config=AdapterConfig(mh_adapter=True,
                                                      output_adapter=True,
                                                      reduction_factor=12, 
                                                      non_linearity='relu'))

# Activate the adapter
model.train_adapter("adapter_jul")

# Define training arguments
args = TrainingArguments("peft-adapter-jul",
                         learning_rate=1e-4,
                         num_train_epochs=10,
                         per_device_train_batch_size=16,
                         per_device_eval_batch_size=16,
                         push_to_hub=True)

# Batch processed examples together while applying padding to make them the same size
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Load seqeval metric commonly used to evaluate results on CONLL
seqeval = evaluate.load("seqeval")

# Load trainer
trainer = AdapterTrainer(model,
                         args,
                         train_dataset=tokenized_datasets["train"],
                         eval_dataset=tokenized_datasets["valid"],
                         data_collator=data_collator,
                         tokenizer=tokenizer,
                         compute_metrics=compute_metrics,
                         callbacks=[TensorBoardCallback()])

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner/snapshots/cc63721791a6e1d60f4764997bbb311667ec75d8/config.json
You are using a model of type camembert to instantiate a model of type roberta. This is not supported for all configurations of models and can yield errors.
Model config RobertaConfig {
  "_name_or_path": "camembert-base",
  "architectures": [
    "CamembertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 5,
  "classifier_dropout": null,
  "eos_token_id": 6,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "O",
    "1": "I-LOC",
    "2": "I-PER",
    "3": "I-MISC",
    "4": "I-ORG"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "I-LOC": 1,
    "I-MISC": 3,
    "I-ORG": 4,
    "I-PER": 2,
    "O": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embe

In [None]:
# Train 
trainer.train()

The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 6518
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 4080
  Number of trainable parameters = 2383109


Step,Training Loss
500,0.1231
1000,0.0729
1500,0.0557
2000,0.0459
2500,0.0389
3000,0.0328
3500,0.0274
4000,0.0252


Saving model checkpoint to peft-adapter-jul/checkpoint-500
Configuration saved in peft-adapter-jul/checkpoint-500/adapter_jul/adapter_config.json
Module weights saved in peft-adapter-jul/checkpoint-500/adapter_jul/pytorch_adapter.bin
Configuration saved in peft-adapter-jul/checkpoint-500/adapter_jul/head_config.json
Module weights saved in peft-adapter-jul/checkpoint-500/adapter_jul/pytorch_model_head.bin
tokenizer config file saved in peft-adapter-jul/checkpoint-500/tokenizer_config.json
Special tokens file saved in peft-adapter-jul/checkpoint-500/special_tokens_map.json
tokenizer config file saved in peft-adapter-jul/tokenizer_config.json
Special tokens file saved in peft-adapter-jul/special_tokens_map.json
Saving model checkpoint to peft-adapter-jul/checkpoint-1000
Configuration saved in peft-adapter-jul/checkpoint-1000/adapter_jul/adapter_config.json
Module weights saved in peft-adapter-jul/checkpoint-1000/adapter_jul/pytorch_adapter.bin
Configuration saved in peft-adapter-jul/chec

TrainOutput(global_step=4080, training_loss=0.05232211637730692, metrics={'train_runtime': 338.7567, 'train_samples_per_second': 192.409, 'train_steps_per_second': 12.044, 'total_flos': 967484011787940.0, 'train_loss': 0.05232211637730692, 'epoch': 10.0})

In [None]:
# Train 
trainer.evaluate(eval_dataset=tokenized_datasets["test"])

The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2173
  Batch size = 16


Trainer is attempting to log a value of "{'precision': 0.6808510638297872, 'recall': 0.7407407407407407, 'f1': 0.7095343680709535, 'number': 216}" of type <class 'dict'> for key "eval/LOC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.5416666666666666, 'recall': 0.325, 'f1': 0.40624999999999994, 'number': 40}" of type <class 'dict'> for key "eval/MISC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.75, 'recall': 0.81, 'f1': 0.7788461538461539, 'number': 200}" of type <class 'dict'> for key "eval/ORG" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7989130434782609, 'recall': 0.75, 'f1': 0.7736842105263159, 'number': 196}" of type <clas

{'eval_loss': 0.0857548713684082,
 'eval_LOC': {'precision': 0.6808510638297872,
  'recall': 0.7407407407407407,
  'f1': 0.7095343680709535,
  'number': 216},
 'eval_MISC': {'precision': 0.5416666666666666,
  'recall': 0.325,
  'f1': 0.40624999999999994,
  'number': 40},
 'eval_ORG': {'precision': 0.75,
  'recall': 0.81,
  'f1': 0.7788461538461539,
  'number': 200},
 'eval_PER': {'precision': 0.7989130434782609,
  'recall': 0.75,
  'f1': 0.7736842105263159,
  'number': 196},
 'eval_overall_precision': 0.7314112291350531,
 'eval_overall_recall': 0.7392638036809815,
 'eval_overall_f1': 0.7353165522501907,
 'eval_overall_accuracy': 0.9799400135453284,
 'eval_runtime': 4.6526,
 'eval_samples_per_second': 467.048,
 'eval_steps_per_second': 29.231,
 'epoch': 10.0}

In [None]:
# Upload the result of the training to HuggingFace Hub
trainer.push_to_hub(commit_message="Training complete")

Saving model checkpoint to peft-adapter-jul
Configuration saved in peft-adapter-jul/adapter_jul/adapter_config.json
Module weights saved in peft-adapter-jul/adapter_jul/pytorch_adapter.bin
Configuration saved in peft-adapter-jul/adapter_jul/head_config.json
Module weights saved in peft-adapter-jul/adapter_jul/pytorch_model_head.bin
tokenizer config file saved in peft-adapter-jul/tokenizer_config.json
Special tokens file saved in peft-adapter-jul/special_tokens_map.json


Upload file adapter_jul/pytorch_adapter.bin:   0%|          | 32.0k/9.11M [00:00<?, ?B/s]

Upload file runs/May13_17-11-10_08c80963a5b7/events.out.tfevents.1683998219.08c80963a5b7.521.28: 100%|########…

Upload file adapter_jul/pytorch_model_head.bin: 100%|##########| 16.1k/16.1k [00:00<?, ?B/s]

Upload file runs/May13_17-11-10_08c80963a5b7/events.out.tfevents.1683997876.08c80963a5b7.521.24: 100%|########…

Upload file runs/May13_17-11-10_08c80963a5b7/events.out.tfevents.1683998219.08c80963a5b7.521.29: 100%|########…

Upload file runs/May13_17-11-10_08c80963a5b7/events.out.tfevents.1683997876.08c80963a5b7.521.26: 100%|########…

To https://huggingface.co/fgiauna/peft-adapter-jul
   a8e246f..d968ac2  main -> main

   a8e246f..d968ac2  main -> main

Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Token Classification', 'type': 'token-classification'}}
To https://huggingface.co/fgiauna/peft-adapter-jul
   d968ac2..84801e2  main -> main

   d968ac2..84801e2  main -> main



'https://huggingface.co/fgiauna/peft-adapter-jul/commit/d968ac23c8d2375830815f97fc8b77e5d6e1e410'