# Summarization

In [None]:
! pip install bitsandbytes transformers peft accelerate
! pip install datasets trl ninja packaging
! pip install evaluate rouge_score



In [1]:
import torch
import os
import sys
import json
import IPython
from datetime import datetime
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
)
from trl import SFTTrainer





In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Load BillSum dataset

Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:

In [3]:
from datasets import load_dataset

billsum = load_dataset("billsum", split="ca_test")

README.md:   0%|          | 0.00/7.27k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Split the dataset into a train and test set with the [train_test_split](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.train_test_split) method:

In [4]:
billsum = billsum.train_test_split(test_size=0.2)

Then take a look at an example:

In [5]:
billsum["train"][0]

{'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 22602 is added to the Financial Code, to read:\n22602.\n(a) A licensee that is a finance lender may pay compensation to a person that is not licensed pursuant to this division in connection with the referral of one or more prospective borrowers to the licensee, when all of the following conditions are met:\n(1) The referral by the unlicensed person leads to the consummation of a commercial loan, as defined in Section 22502, between the licensee and the prospective borrower referred by the unlicensed person.\n(2) The loan contract provides for an annual percentage rate that does not exceed 36 percent.\n(3) Before approving the loan, the licensee does both of the following:\n(A) Obtains documentation from the prospective borrower documenting the borrower’s commercial status. Examples of acceptable forms of documentation include, but are not limited to, a seller’s permit, business license, articl

There are two fields that you'll want to use:

- `text`: the text of the bill which'll be the input to the model.
- `summary`: a condensed version of `text` which'll be the model target.

## Preprocess

The next step is to load a T5 tokenizer to process `text` and `summary`:

In [6]:
from transformers import AutoTokenizer

checkpoint = "t5-3b"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

The preprocessing function you want to create needs to:

1. Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
2. Use the keyword `text_target` argument when tokenizing labels.
3. Truncate sequences to be no longer than the maximum length set by the `max_length` parameter.

In [7]:
prefix = "summarize: "


def preprocess_function(examples):
    inputs = [prefix + doc for doc in examples["text"]]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True)

    labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [8]:
tokenized_billsum = billsum.map(preprocess_function, batched=True)

Map:   0%|          | 0/989 [00:00<?, ? examples/s]

Map:   0%|          | 0/248 [00:00<?, ? examples/s]

Now create a batch of examples using [DataCollatorForSeq2Seq](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DataCollatorForSeq2Seq). It's more efficient to *dynamically pad* the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.

In [9]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)

## Evaluate

In [10]:
import evaluate

rouge = evaluate.load("rouge")

Then create a function that passes your predictions and labels to [compute](https://huggingface.co/docs/evaluate/main/en/package_reference/main_classes#evaluate.EvaluationModule.compute) to calculate the ROUGE metric:

In [11]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

Your `compute_metrics` function is ready to go now, and you'll return to it when you setup your training.

## Train

Next, we generate the quantization parameters by initializing the model with 4 bits, employing the NF4 format (4-bit NormalFloat - NF4), a new data type ideal for normally distributed weights, and implementing double quantization to achieve additional memory conservation.

In [12]:
#Quantization as defined https://huggingface.co/docs/optimum/concept_guides/quantization will help us reduce the size of the model for it to fit on a single GPU
#Quantization configuration
compute_dtype = getattr(torch, "float16")
print(compute_dtype)
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=True,
)

torch.float16


<Tip>

If you aren't familiar with finetuning a model with the [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer), take a look at the basic tutorial [here](https://huggingface.co/docs/transformers/main/en/tasks/../training#train-with-pytorch-trainer)!

</Tip>

You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForSeq2SeqLM):

In [None]:
# from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

# model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

# model.get_memory_footprint()

In [None]:
# print(model)

In [13]:
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

model_q = AutoModelForSeq2SeqLM.from_pretrained(checkpoint,quantization_config=bnb_config, device_map={"": 0}) #device_map="auto" will cause a problem in the training

model_q.get_memory_footprint()

model.safetensors:   0%|          | 0.00/11.4G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/t5-3b/a0e6c24ae12dd1c39e454fede314b20311c9e83e9e785b0141e8586010ef7766?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1742790365&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0Mjc5MDM2NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby90NS0zYi9hMGU2YzI0YWUxMmRkMWMzOWU0NTRmZWRlMzE0YjIwMzExYzllODNlOWU3ODViMDE0MWU4NTg2MDEwZWY3NzY2P3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiJ9XX0_&Signature=OXXCp4-Reo6NLf8csb79kdfx8vO4RrrgxQpuE72lna0DYBS9zH9YQWWZbEbnILhVR-DSb8tD%7ErpkFEh7qElRHFfRlu8EMPaUg4eMBZr8109Wl1clbXxBwSTJFQXCOuBCl3tRi4%7EkXef6Ej-yHAJ63wx1MN4BHIuzn98Hq0eIqs54Nofbze5ysrpCK%7EBYZCoOkx5RmzCqKwJMXdki3T6kKRilQKbe7gZgs3mKl%7Ed7fG1S4IC5upE26XEeABaHptGyfiebwxiSLzcVF8rrmwUjHyzs0%7Ey6%7EqKQSuqHUHirSI%7EKliBj0Kc-D2JGDbn7Xwiw7P8s8-JOv7CGkOm6Ymxy7Q__&Key-Pair-Id=K3RPWS32NSSJCE: HTTPSConnectionPool(host='cdn-lfs.hf.co', port=44

ConnectionError: (MaxRetryError('HTTPSConnectionPool(host=\'cdn-lfs.hf.co\', port=443): Max retries exceeded with url: /t5-3b/a0e6c24ae12dd1c39e454fede314b20311c9e83e9e785b0141e8586010ef7766?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model.safetensors%3B+filename%3D%22model.safetensors%22%3B&Expires=1742790365&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0Mjc5MDM2NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby90NS0zYi9hMGU2YzI0YWUxMmRkMWMzOWU0NTRmZWRlMzE0YjIwMzExYzllODNlOWU3ODViMDE0MWU4NTg2MDEwZWY3NzY2P3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiJ9XX0_&Signature=OXXCp4-Reo6NLf8csb79kdfx8vO4RrrgxQpuE72lna0DYBS9zH9YQWWZbEbnILhVR-DSb8tD~rpkFEh7qElRHFfRlu8EMPaUg4eMBZr8109Wl1clbXxBwSTJFQXCOuBCl3tRi4~kXef6Ej-yHAJ63wx1MN4BHIuzn98Hq0eIqs54Nofbze5ysrpCK~BYZCoOkx5RmzCqKwJMXdki3T6kKRilQKbe7gZgs3mKl~d7fG1S4IC5upE26XEeABaHptGyfiebwxiSLzcVF8rrmwUjHyzs0~y6~qKQSuqHUHirSI~KliBj0Kc-D2JGDbn7Xwiw7P8s8-JOv7CGkOm6Ymxy7Q__&Key-Pair-Id=K3RPWS32NSSJCE (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x0000025533BF73A0>: Failed to resolve \'cdn-lfs.hf.co\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 3bb30231-e82b-4e58-8ca5-3f456e6c6d70)')

In [None]:
print(model_q)
#You can see that all the layers are Linear4bit

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear4bit(in_features=1024, out_features=4096, bias=False)
              (k): Linear4bit(in_features=1024, out_features=4096, bias=False)
              (v): Linear4bit(in_features=1024, out_features=4096, bias=False)
              (o): Linear4bit(in_features=4096, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 32)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear4bit(in_features=1024, out_features=16384, bias=False)
              (wo): Linear(in_features=16384, out_features=1024, bias=F

We also observe the names of the different layers/modules of the models (SelfAttention, DenseReluDense, etc.). we define the learning parameters of LoRA such as rank r, which is the rank the adapter matrices. The higher this rank, the greater the number of weights in the lower-rank matrices. In our case, we set it to 32, but you can increase it if the performance is not desirable, or decrease it to reduce the number of trainable weights and memory footprint of optimizer parameters associated with each weight. The dropout rate corresponds to the proportion of weights that should be set to 0 during training phase to make the network more robust and to prevent overfitting.

In [None]:
peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.05,
        r=32,
        bias="none",
        task_type="SEQ_2_SEQ_LM",
        target_modules= ['v', 'o'],
        modules_to_save=["lm_head"],
)

In [None]:
#Cast some modules of the model to fp32
model_q = prepare_model_for_kbit_training(model_q)
#Configure the pad token in the model
model_q.config.pad_token_id = tokenizer.pad_token_id
model_q.config.use_cache = False # Gradient checkpointing is used by default but not compatible with caching

In [None]:
model_q.add_adapter(peft_config, adapter_name="adapter_4")

In [None]:
model_q.set_adapter("adapter_4")

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = model.num_parameters()
    for _, param in model.named_parameters():
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model_q)

trainable params: 56492032 || all params: 2908090368 || trainable%: 1.9425817237877527


In [None]:
print(model_q)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear4bit(in_features=1024, out_features=4096, bias=False)
              (k): Linear4bit(in_features=1024, out_features=4096, bias=False)
              (v): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=1024, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (adapter_4): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (adapter_4): Linear(in_features=1024, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (adapter_4): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embeddi

In [None]:
# print(model)

At this point, only three steps remain:

1. Define your training hyperparameters in [Seq2SeqTrainingArguments](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Seq2SeqTrainingArguments). The only required parameter is `output_dir` which specifies where to save your model. At the end of each epoch, the [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) will evaluate the ROUGE metric and save the training checkpoint.
2. Pass the training arguments to [Seq2SeqTrainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Seq2SeqTrainer) along with the model, dataset, tokenizer, data collator, and `compute_metrics` function.
3. Call [train()](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.train) to finetune your model.

In [None]:

training_args = Seq2SeqTrainingArguments(
    output_dir="my_awesome_billsum_model",
    evaluation_strategy="epoch",
    optim="paged_adamw_8bit", #used with QLoRA
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    save_total_limit=3,
    learning_rate=2e-5,
    num_train_epochs=4,
    predict_with_generate=True,
    fp16=True,
    #push_to_hub=True,
)

trainer = Seq2SeqTrainer(
    model=model_q,
    args=training_args,
    train_dataset=tokenized_billsum["train"],
    eval_dataset=tokenized_billsum["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,No log,,0.1313,0.0443,0.108,0.1077,19.0


*italicized text*## Inference

Great, now that you've finetuned a model, you can use it for inference!

Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:

In [None]:
text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."

Tokenize the text and return the `input_ids` as PyTorch tensors:

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
inputs = tokenizer(text, return_tensors="pt").input_ids

Use the [generate()](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate) method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the [Text Generation](https://huggingface.co/docs/transformers/main/en/tasks/../main_classes/text_generation) API.

In [None]:
from transformers import AutoModelForSeq2SeqLM

model = model_q
outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)

Decode the generated token ids back into text:

In [None]:
tokenizer.decode(outputs[0], skip_special_tokens=True)

# Fine-tune google/T5-Large with samsum dataset

In [None]:
import pandas as pd
import numpy as np 
import torch
from tqdm import tqdm
from random import randrange
import plotly.express as px
import gc
import os
import wandb
from kaggle_secrets import UserSecretsClient

from datasets import load_dataset,concatenate_datasets
from transformers import (AutoTokenizer, AutoModelForSeq2SeqLM,DataCollatorForSeq2Seq,
                          BitsAndBytesConfig,Seq2SeqTrainingArguments,Seq2SeqTrainer)
from peft import LoraConfig, get_peft_model,prepare_model_for_kbit_training , TaskType
import evaluate

from accelerate import Accelerator
accelerator = Accelerator()
user_secrets = UserSecretsClient()

In [None]:
class cVariables: 
    
    __shared_instance = None
    @staticmethod
    def get_instance():
        if cVariables.__shared_instance == None: cVariables()
        return __shared_instance
    def __init__(self):
        if cVariables.__shared_instance != None : raise Exception("This class is a singleton class !")
        else:  cVariables.__shared_instance = self
        #----------------------
        self.ATTEMPT_NO = 0 # i reduce sample of data to able to train model because size in gpu
        # this parameter  is responsible for dividing data into and out
        # get_SizeSampleTrain and get_SizeSampleEval to return start and end of sample of data
        #----------------------

    def get_SizeSampleTrain(self):
        assert self.ATTEMPT_NO < 7 , "ATTEMPT_NO should be less than 7"
        TRAIN_SIZE=14732//6
        TRAIN_LIST = [i*TRAIN_SIZE for i in range(0,8)]
        return  TRAIN_LIST[self.ATTEMPT_NO] , TRAIN_LIST[self.ATTEMPT_NO+1]
    def get_SizeSampleEval(self):
        assert self.ATTEMPT_NO < 7 , "ATTEMPT_NO should be less than 7"
        if self.ATTEMPT_NO == 6 :
            return TRAIN_LIST[-1] , 14732
        EVAL_SIZE=818//6
        EVAL_LIST = [i*EVAL_SIZE for i in range(0,7)]
        return EVAL_LIST[self.ATTEMPT_NO] , EVAL_LIST[self.ATTEMPT_NO+1]

    Paths={
    'data' : 'samsum',
    'model': 'google/flan-t5-large',       
    'new_checkpoint': f'FlanT5Summarization-samsum',
    'wandb_proj': 'Summarization by Finetuning FlanT5-LoRA',
    'wandb_run':f'flant5Summarization',
    }
    Hayperparameters={
     'max_source_length':512,
     'max_target_length':128,
     'batch_size_train':128,
     'batch_size_eval':64,
     'epochs':3,
     'lr':3e-5,
     'l2':0.01,
    }
    Tokens={'huggingface' :user_secrets.get_secret("huggingface"),
            'wandb': user_secrets.get_secret("wandb")}
var = cVariables()

In [None]:
def clear_gpu():
    print(gc.collect()) 
    torch.cuda.empty_cache()
    print(gc.collect())

In [None]:
rouge = evaluate.load("rouge")

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    # Ensure the predictions and labels are in the correct format
    if isinstance(pred_ids, tuple):
        pred_ids = pred_ids[0]

    pred_ids = np.argmax(pred_ids, axis=-1) if pred_ids.ndim == 3 else pred_ids

    # Convert tensors to lists
    pred_ids = pred_ids.tolist()
    labels_ids = labels_ids.tolist()

    # Decode generated summaries and labels (converting token IDs back to text)
    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids = [[token for token in label if token != -100] for label in labels_ids]
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    # Compute ROUGE scores
    rouge_output = rouge.compute(predictions=pred_str, references=label_str)

    return {
        "rouge1": rouge_output["rouge1"],
        "rouge2": rouge_output["rouge2"],
        "rougeL": rouge_output["rougeL"],
        "rougeLsum": rouge_output["rougeLsum"],
    }

In [None]:
tokenizer = AutoTokenizer.from_pretrained(var.Paths['model'],token=var.Tokens['huggingface'])

In [None]:
def process_dataset(data):
    inputs = ["summarize: " + item for item in data["dialogue"]]

    model_inputs = tokenizer(inputs,add_special_tokens=True,
                max_length=var.Hayperparameters['max_source_length'],
                padding='max_length',
                truncation=True,
                return_tensors='pt')
    model_target = tokenizer(inputs,add_special_tokens=True,
                max_length=var.Hayperparameters['max_target_length'],
                padding='max_length',
                truncation=True,
                return_tensors='pt')
    model_target["input_ids"] = [
        [(l if l != tokenizer.pad_token_id else -100) for l in model_target] for model_target in model_target["input_ids"]]
    
    model_inputs["labels"] = model_target["input_ids"]
    return model_inputs

In [None]:
os.environ["WANDB_API_KEY"] = var.Tokens['wandb']
os.environ["WANDB_DEBUG"] = "true"
os.environ["WANDB_PROJECT"]=var.Paths['wandb_proj']
os.environ["WANDB_NAME"] = var.Paths['new_checkpoint']

wandb.init()

In [None]:
dataset = load_dataset(var.Paths['data'], trust_remote_code=True)

In [None]:
tokenized_dataset = dataset.map(process_dataset, batched=True, remove_columns=["dialogue", "summary", "id"])
print(f"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}")

In [None]:
# Slice the mapped datasets to get the smaller samples
start_train, end_train = var.get_SizeSampleTrain()
start_eval, end_eval = var.get_SizeSampleEval()

train_dataset = tokenized_dataset['train'].select(range(start_train,end_train))
validation_dataset = tokenized_dataset['validation'].select(range(start_eval,end_eval))

In [None]:
quantization_config = BitsAndBytesConfig(load_in_4bit=True,)

model = AutoModelForSeq2SeqLM.from_pretrained(var.Paths['model'],
                                            quantization_config=quantization_config,
                                            device_map="auto",
                                            token=var.Tokens['huggingface'])
clear_gpu()

In [None]:
# Define LoRA Config
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q", "v"],

    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

# prepare int-8 model for training
model = prepare_model_for_kbit_training(model)

# # add LoRA adaptor
model = get_peft_model(model, lora_config)

In [None]:
# we want to ignore tokenizer pad token in the loss
label_pad_token_id = -100
# Data collator
data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model,
    label_pad_token_id=label_pad_token_id,
    pad_to_multiple_of=8
)

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir=var.Paths['new_checkpoint'],
    num_train_epochs=var.Hayperparameters['epochs'],
    
    evaluation_strategy = 'steps',
    save_strategy = 'steps',
    load_best_model_at_end = True,
    logging_steps =5,
    eval_steps = 5,
    save_total_limit =2,
    predict_with_generate=True , # For generating summaries during evaluation

    
    lr_scheduler_type = "cosine",
    learning_rate = var.Hayperparameters['lr'],
    optim="adamw_torch",
    
    auto_find_batch_size=True,
    per_device_train_batch_size = var.Hayperparameters['batch_size_train'],
    per_device_eval_batch_size = var.Hayperparameters['batch_size_eval'],
    weight_decay = var.Hayperparameters['l2'],
    warmup_ratio=0.1,
    gradient_accumulation_steps=4,
    
    push_to_hub=True,
    hub_private_repo=True,
    hub_token=var.Tokens['huggingface'],
    run_name=var.Paths['new_checkpoint'],

    report_to=['wandb'],
)
clear_gpu()

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    tokenizer=tokenizer,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
)

model.config.use_cache = False
clear_gpu()

In [None]:
trainer.train()
clear_gpu()

In [None]:
trainer.save_model(output_dir="t5T1")

In [None]:
wandb.finish()

# Evaluation Model Efficiency

In [3]:
from config import REDIS_CONFIG
import redis

# Connection pooling để tối ưu kết nối
pool = redis.ConnectionPool(
    host=REDIS_CONFIG['host'],
    port=REDIS_CONFIG['port'],
    db=REDIS_CONFIG['db'],
    password=REDIS_CONFIG['password'],
    max_connections=100,  # Tối đa 100 kết nối đồng thời
    decode_responses=REDIS_CONFIG['decode_responses']
)