# Fine Tuning Flan-T5 NER LORA


In [1]:
import evaluate
import numpy as np

from transformers import (
    DataCollatorForTokenClassification,
    AutoModelForTokenClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
)
from datasets import load_from_disk
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
    TaskType,
)

amdgpu.ids: No such file or directory


## Load Dataset

Dataset has already been preprocessed by script and saved, so we just need to load it


In [2]:
train_ds = load_from_disk("./data/processed/train")
val_ds = load_from_disk("./data/processed/val")

In [3]:
train_ds[0]

{'key': 0,
 'transcript': 'Turret, prepare to deploy electromagnetic pulse. Heading zero six five, target is grey and white fighter jet. Engage when ready.',
 'tool': 'electromagnetic pulse',
 'heading': '065',
 'target': 'grey and white fighter jet',
 'input_ids': [3,
  2905,
  60,
  17,
  3,
  6,
  2967,
  12,
  17274,
  28641,
  13468,
  3,
  5,
  6904,
  5733,
  1296,
  874,
  3,
  6,
  2387,
  19,
  7592,
  11,
  872,
  14248,
  8757,
  3,
  5,
  4082,
  116,
  1065,
  3,
  5,
  1],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1],
 'labels': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  3,
  4,
  0,
  0,
  0,
  5,
  6,
  6,
  0,
  0,
  0,
  0,
  1,
  2,
  2,
  2,
  2,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  -100]}

## Train Model


In [4]:
model_name = "google/flan-t5-base"
label2id = {
    "O": 0,
    "B-TAR": 1,
    "I-TAR": 2,
    "B-TOOL": 3,
    "I-TOOL": 4,
    "B-DIR": 5,
    "I-DIR": 6,
}
id2label = {v: k for k, v in label2id.items()}

model = AutoModelForTokenClassification.from_pretrained(
    model_name,
    quantization_config=BitsAndBytesConfig(load_in_8bit=True),
    id2label=id2label,
    label2id=label2id,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model.model_parallel = False

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
metric = evaluate.load("seqeval")


def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [
        [id2label[l] for l in label if l != -100] for label in labels
    ]
    true_predictions = [
        [id2label[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(
        predictions=true_predictions, references=true_labels
    )
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Some weights of T5ForTokenClassification were not initialized from the model checkpoint at google/flan-t5-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model = prepare_model_for_kbit_training(model)

In [6]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q", "v"],
    use_rslora=True,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.TOKEN_CLS,
)
# lora_config = LoraConfig(
#     r=16,
#     lora_alpha=32,
#     target_modules="all-linear",
#     use_rslora=True,
#     lora_dropout=0.05,
#     bias="none",
#     task_type=TaskType.TOKEN_CLS,
    # use_dora=True
# )


model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 595207 || all params: 110229134 || trainable%: 0.5399724903944179


In [7]:
args = TrainingArguments(
    "output/flan-t5-lora-v1",
    evaluation_strategy="epoch",
    learning_rate=1e-3,
    gradient_accumulation_steps=1,
    auto_find_batch_size=True,
    num_train_epochs=1,
    save_steps=100,
    save_total_limit=8,
)

In [8]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

In [9]:
trainer.train()

  0%|          | 0/350 [00:00<?, ?it/s]



  0%|          | 0/88 [00:00<?, ?it/s]

{'eval_loss': 0.0073816352523863316, 'eval_precision': 0.998468606431853, 'eval_recall': 0.9994890137966275, 'eval_f1': 0.9989785495403471, 'eval_accuracy': 0.9995068812480388, 'eval_runtime': 3.7527, 'eval_samples_per_second': 186.532, 'eval_steps_per_second': 23.45, 'epoch': 1.0}
{'train_runtime': 45.693, 'train_samples_per_second': 61.279, 'train_steps_per_second': 7.66, 'train_loss': 0.1364708491734096, 'epoch': 1.0}


TrainOutput(global_step=350, training_loss=0.1364708491734096, metrics={'train_runtime': 45.693, 'train_samples_per_second': 61.279, 'train_steps_per_second': 7.66, 'total_flos': 72773296203168.0, 'train_loss': 0.1364708491734096, 'epoch': 1.0})

In [10]:
model.merge_and_unload()
model.save_pretrained("./output/flan-t5-lora-v1/flan-t5-base-lora-rslora-v1.1")



NotImplementedError: igemmlt not available (probably built with NO_CUBLASLT)

In [None]:
model.eval()
input_text = "Control here. Deploy anti-air artillery to target a silver, blue, and red helicopter heading one three zero. Engage and neutralize the threat."

from transformers import pipeline

cls = pipeline(
    "token-classification",
    model=model,
    tokenizer=tokenizer,
    aggregation_strategy="first",
)
cls(input_text)

The model 'PeftModelForTokenClassification' is not supported for token-classification. Supported models are ['AlbertForTokenClassification', 'BertForTokenClassification', 'BigBirdForTokenClassification', 'BioGptForTokenClassification', 'BloomForTokenClassification', 'BrosForTokenClassification', 'CamembertForTokenClassification', 'CanineForTokenClassification', 'ConvBertForTokenClassification', 'Data2VecTextForTokenClassification', 'DebertaForTokenClassification', 'DebertaV2ForTokenClassification', 'DistilBertForTokenClassification', 'ElectraForTokenClassification', 'ErnieForTokenClassification', 'ErnieMForTokenClassification', 'EsmForTokenClassification', 'FalconForTokenClassification', 'FlaubertForTokenClassification', 'FNetForTokenClassification', 'FunnelForTokenClassification', 'GPT2ForTokenClassification', 'GPT2ForTokenClassification', 'GPTBigCodeForTokenClassification', 'GPTNeoForTokenClassification', 'GPTNeoXForTokenClassification', 'IBertForTokenClassification', 'LayoutLMForTok

[{'entity_group': 'TOOL',
  'score': 0.999987,
  'word': 'anti-airartillery',
  'start': 20,
  'end': 39},
 {'entity_group': 'TAR',
  'score': 0.9999998,
  'word': 'silver,blue,andredhelicopter',
  'start': 51,
  'end': 84},
 {'entity_group': 'DIR',
  'score': 1.0,
  'word': 'onethreezero.',
  'start': 92,
  'end': 108}]