In [1]:
from datasets import load_from_disk
from transformers import DataCollatorForSeq2Seq, T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainer, Seq2SeqTrainingArguments
from peft import LoraConfig, get_peft_model, TaskType

In [2]:
# 1. Dataset
train = load_from_disk('../data/dt_train')
test = load_from_disk('../data/dt_test')
print(train)

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 32520
})


In [3]:
model_id="google/flan-t5-large"
tokenizer = T5Tokenizer.from_pretrained(model_id)
model = T5ForConditionalGeneration.from_pretrained(model_id, device_map="cuda")
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [4]:
# Define LoRA Config 
lora_config = LoraConfig(
 r=14, 
 lora_alpha=30,
 target_modules=["q", "v"],
 lora_dropout=0.05,
 bias="none",
 task_type=TaskType.SEQ_2_SEQ_LM
)
# add LoRA adaptor
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 4,718,592 || all params: 787,868,672 || trainable%: 0.5989


In [5]:
output_dir="../data/res"

# Define training args
training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
		auto_find_batch_size=True,
    learning_rate=1e-3, # higher learning rate
    num_train_epochs=5,
    logging_dir=f"{output_dir}/logs",
    logging_strategy="steps",
    logging_steps=2,
    eval_strategy='steps',
    eval_steps=2,
    logging_first_step=True,
    report_to="tensorboard",
    save_strategy='steps',
    save_steps=2,
    save_total_limit=5,
    load_best_model_at_end=True
)

# Create Trainer instance
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train,
    eval_dataset=test
)

In [None]:
trainer.train()