# Fine-tuning of T5-small model with applying Quantization and LoRA

In [1]:
%%capture
!pip install transformers datasets evaluate rouge_score bitsandbytes accelerate

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from datasets import load_dataset

dataset = load_dataset("ccdv/cnn_dailymail", '3.0.0', trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/13.9k [00:00<?, ?B/s]

cnn_dailymail.py:   0%|          | 0.00/9.27k [00:00<?, ?B/s]

cnn_stories.tgz:   0%|          | 0.00/159M [00:00<?, ?B/s]

dailymail_stories.tgz:   0%|          | 0.00/376M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/46.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.43M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [4]:
from transformers import AutoTokenizer

checkpoint = "google-t5/t5-small"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

In [5]:
prefix = "summarize: "


def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["article"]]
    model_inputs = tokenizer(inputs)

    labels = tokenizer(text_target=examples["highlights"])

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [6]:
dataset_train = dataset['train'].select(range(30000))
dataset_train = dataset_train.map(preprocess_function, batched=True)
dataset_train = dataset_train.filter(lambda x: len(x["input_ids"]) <= 1024, batched=False)
dataset_train = dataset_train.filter(lambda x: len(x["labels"]) <= 512, batched=False)

Map:   0%|          | 0/30000 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2118 > 512). Running this sequence through the model will result in indexing errors


Filter:   0%|          | 0/30000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/18662 [00:00<?, ? examples/s]

In [7]:
dataset_val = dataset['validation'].select(range(2000))
dataset_val = dataset_val.map(preprocess_function, batched=True)
dataset_val = dataset_val.filter(lambda x: len(x["input_ids"]) <= 1024, batched=False)
dataset_val = dataset_val.filter(lambda x: len(x["labels"]) <= 512, batched=False)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1267 [00:00<?, ? examples/s]

In [8]:
# size of train and val sets
print(len(dataset_train))
print(len(dataset_val))

18662
1267


In [9]:
# longest article in tokens
len(max(dataset_train['input_ids'], key=len))

1024

In [10]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)

In [11]:
import evaluate

rouge = evaluate.load("rouge")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [12]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [13]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # 4-bit quantization
    bnb_4bit_quant_type='nf4',  # Normalized float 4
    bnb_4bit_use_double_quant=True,  # Second quantization after the first
    bnb_4bit_compute_dtype=torch.bfloat16  # Computation type
)

model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, quantization_config=bnb_config)
model.config.max_length = 256

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [14]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=1,
    lora_alpha=1,
    target_modules=["q", "v"],  # Targeting 'q' and 'v' modules
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM  # For T5, use SEQ_2_SEQ_LM
)

model = get_peft_model(model, lora_config)

In [15]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [16]:
print_trainable_parameters(model)

trainable params: 36864 || all params: 44814848 || trainable%: 0.0822584514846508


In [17]:
training_args = Seq2SeqTrainingArguments(
    output_dir="/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2",
    eval_strategy="steps",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=8,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    save_total_limit=3,
    eval_steps=10,
    logging_steps=10,
    save_steps=10,
    num_train_epochs=1,
    generation_max_length=256,
    predict_with_generate=True,
    fp16=True, #change to bf16=True for XPU
    report_to="none",
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


trainer.train()

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
10,2.3393,2.325108,0.3361,0.135,0.2458,0.2457,59.3386
20,2.3661,2.325001,0.3365,0.135,0.2452,0.2451,59.3291
30,2.3504,2.324922,0.336,0.1344,0.2455,0.2453,59.4049
40,2.3723,2.32481,0.3361,0.1353,0.2458,0.2457,59.5249
50,2.3428,2.324699,0.3365,0.1354,0.2459,0.2457,59.3812
60,2.3376,2.32466,0.3357,0.1345,0.2455,0.2453,59.3567
70,2.3466,2.324572,0.336,0.1349,0.2458,0.2456,59.3931




TrainOutput(global_step=73, training_loss=2.3518745605259728, metrics={'train_runtime': 1029.2196, 'train_samples_per_second': 18.132, 'train_steps_per_second': 0.071, 'total_flos': 4924958224711680.0, 'train_loss': 2.3518745605259728, 'epoch': 1.0})

In [18]:
model.save_pretrained("/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2")
tokenizer.save_pretrained("/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2")

('/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2/tokenizer_config.json',
 '/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2/special_tokens_map.json',
 '/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2/spiece.model',
 '/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2/added_tokens.json',
 '/content/drive/MyDrive/t5-small-finetuned-cnn_dailymail_optimized2/tokenizer.json')