# Parameter Efficient Fine Tuning(PEFT)   
This technique uses LORA i.e. Lower Rank Adaptation and prompt tuning. After fine tuning with LORA, the original LLM remains unchanged and new LORA Adapter emerges. This LORA adapter is much smaller in size and use fewer resources as compare to original LLM.

In [6]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [5]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules = ["q", "v"],
    lora_dropout = 0.05,
    bias = 'none',
    task_type= TaskType.SEQ_2_SEQ_LM #FLAN-T5
)

In [7]:
# Load the Model
model_name = 'google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name , torch_dtype = torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"


trainable model parameters: 3538944
all model parameters: 251116800
percentage of trainable model parameters: 1.41%


In [16]:
# Now lets Add LoRA to the Original Model

peft_model = get_peft_model(original_model,lora_config)

print("Original Model:")
print(print_number_of_trainable_model_parameters(original_model))
print("-" * 50)
print("PEFT Model:")
print(print_number_of_trainable_model_parameters(peft_model))

Original Model:
trainable model parameters: 3538944
all model parameters: 251116800
percentage of trainable model parameters: 1.41%
--------------------------------------------------
PEFT Model:
trainable model parameters: 3538944
all model parameters: 251116800
percentage of trainable model parameters: 1.41%


# Train PEFT Adapter

Lets first define the dataset and then tokenize it.

In [23]:
# Lets Load the dataset
huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [24]:
def tokenize_function(example):
    start_prompt = "Summarize the following conversation. \n\n"
    end_prompt = "\n\n Summary:"

    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, truncation = True, padding = "max_length", return_tensors='pt').input_ids
    example['labels'] = tokenizer(example['summary'], truncation = True, padding='max_length', return_tensors='pt').input_ids

    return example

# The dataset actually contains 3 diff splits: train, validation, test.ipynb
# The tokenize_function code is handling all data across all splits in batches.

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id','topic','dialogue','summary',])

In [25]:
output_dir = f"./dialogue-summary-training-{str(int(time.time()))}"
peft_training_args = TrainingArguments(
    output_dir= output_dir,
    auto_find_batch_size = True,
    learning_rate = 1e-3,
    num_train_epochs = 1,
    logging_steps = 1,
    max_steps = 1
)

peft_trainer = Trainer(
    model = peft_model,
    args = peft_training_args,
    train_dataset = tokenized_datasets['train']
)

max_steps is given, it will override any value given in num_train_epochs


In [33]:
# Now everything is ready to train the PEFT Adapter model and save it

peft_trainer.train()

peft_model_path = "./peft-dialogue-summary-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)


Step,Training Loss
1,47.25


('./peft-dialogue-summary-checkpoint-local\\tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local\\special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local\\tokenizer.json')

In [34]:
# Finally we will load hte save Fine tuned model

from peft import PeftModel, PeftConfig

peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base",torch_dtype = torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
peft_model = PeftModel.from_pretrained(peft_model_base,
                                        peft_model_path,
                                        torch_dtype=torch.bfloat16, 
                                        is_trainable=False
                                        )

# Evaluate the Model Qualitatively   
Now lets test the PEFT fine tuned model and see whether it is able to create a reasonable summary of the dialogure compared to the original and full fined model.

In [35]:
#load the saved model
trained_model_dir = "./trained_model"
trained_model = AutoModelForSeq2SeqLM.from_pretrained(trained_model_dir)

In [None]:
# Test the Model with Zero Shot Inferencing
index  = 200

dialogue = dataset['test'][index]['dialogue']
summary= dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt,return_tensors = 'pt').input_ids

#Ensure that the input_ids and the models are on the same device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_ids = input_ids.to(device)
original_model.to(device)
trained_model.to(device)
peft_model.to(device)

original_model_outputs = original_model.generate(input_ids = input_ids, generation_config= GenerationConfig(max_new_tokens=200,num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0],skip_special_tokens=True)
print(original_model_text_output)

instruct_model_outputs = trained_model.generate(input_ids= input_ids, generation_config = GenerationConfig(max_new_tokens = 200, num_beams=1))
instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0],skip_special_tokens = True)

peft_model_outputs = peft_model.generate(input_ids = input_ids, generation_config = GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0],skip_special_tokens=True)



print("-"*50)
print(f"Human Summary: \n{summary}")
print("-"*50)
print(f"Original Model Summary:\n{original_model_text_output}")
print("-"*50)
print(f"Instruct Model Output:\n {instruct_model_text_output}")
print("-"*50)
print(f"PEFT Model Output:\n {peft_model_text_output}")
print("-"*50)



#Person1#: Have you considered upgrading your system? #Person2#: Yes, but I'm not sure what to do about it. #Person1#: You could consider adding a painting program to your software. #Person2#: I'd probably need a faster processor, more memory, and a faster modem. #Person1#: You could also consider adding a CD-ROM drive. #Person2#: I'd probably need a CD-ROM drive. #Person1#: That's great.
--------------------------------------------------
Human Summary: 
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
--------------------------------------------------
Original Model Summary:
#Person1#: Have you considered upgrading your system? #Person2#: Yes, but I'm not sure what to do about it. #Person1#: You could consider adding a painting program to your software. #Person2#: I'd probably need a faster processor, more memory, and a faster modem. #Person1#: You could also consider adding a CD-ROM drive. #Person2#: I'd probably need a CD-ROM drive. #Person1#: 

In [38]:
#Evaluate the Model Quantitatively with ROUGE Metric

dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']
original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, dialogue in enumerate(dialogues):
  prompt = f"""
  summarize the following conversation
  {dialogue}
  Summary:

  """
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids

  # Ensure that input_ids and the models are on the same device
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  input_ids = input_ids.to(device)

  human_baseline_text_output = human_baseline_summaries[idx]

  original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
  original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)
  original_model_summaries.append(original_model_text_output)

  instruct_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
  instruct_model_text_output = tokenizer.decode(instruct_model_outputs[0], skip_special_tokens=True)
  instruct_model_summaries.append(instruct_model_text_output)

  peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
  peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)
  peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns=['human_baseline_summaries', 'original_model_summaries', 'instruct_model_summaries', 'peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,instruct_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,New intra-office memorandum goes out today.,#Person1#: This is an intra-office memo. #Pers...,This memo is to be distributed to all employee...
1,In order to prevent employees from wasting tim...,"#Person1#: Ms. Dawson, I need you to take a di...",The following are the following rules for comm...,This memo is to be distributed to all employee...
2,Ms. Dawson takes a dictation for #Person1# abo...,Memo to all employees.,This memo is to be distributed to all employee...,This memo is to be distributed to all employee...
3,#Person2# arrives late because of traffic jam....,#Person1#: You're finally here! #Person2#: You...,#Person1: I'm so excited for you. I'm so glad ...,The traffic jam at the Carrefour intersection ...
4,#Person2# decides to follow #Person1#'s sugges...,The driver is stuck in traffic.,The car is a waste of time and money.,The traffic jam at the Carrefour intersection ...
5,#Person2# complains to #Person1# about the tra...,#Person1#: I'm not sure what happened to me.,The driver's car is a problem for the driver.,The traffic jam at the Carrefour intersection ...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are divorced.,#Person1: Masha and Hero are getting divorced....,Masha and Hero are getting divorced.
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are having a separation for 2 m...,Masha and Hero are getting divorced.,Masha and Hero are getting divorced.
8,#Person1# and Kate talk about the divorce betw...,#Person1: Masha and Hero are getting divorced.,"#Person1: Kate, you never believe what's happe...",Masha and Hero are getting divorced.
9,#Person1# and Brian are at the birthday party ...,Brian is a guest at the party.,"Happy Birthday, Brian.",Brian's birthday is coming up.
