# Fine Tuning Large Language Model - Model

In this workshop, you will learn how to fine tune the prompts and the LLMs to enhance and improves its response.

In [None]:
!pip install evaluate



In [1]:
# Import libraries
import torch, time
import pandas as pd
import numpy as np
import evaluate
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, GenerationConfig, TrainingArguments

from peft import PeftModel, LoraConfig, get_peft_model, TaskType

In [2]:
# TODO: Load and explore the following datasets

dataset_name = "knkarthick/dialogsum"
model_name = "google/flan-t5-small"

dataset = load_dataset(dataset_name)


In [3]:
# TODO: Print a record
print(dataset.shape)

idx = 10

for k, v in dataset['train'][idx].items():
  print(f'k: {k}\nv: {v}')


{'train': (12460, 4), 'validation': (500, 4), 'test': (1500, 4)}
k: id
v: train_10
k: dialogue
v: #Person1#: Could you do me a favor?
#Person2#: Sure. What is it?
#Person1#: Could you run over to the store? We need a few things.
#Person2#: All right. What do you want me to get?
#Person1#: Well, could you pick up some sugar?
#Person2#: Okay. How much?
#Person1#: A small bag. I guess we also need a few oranges.
#Person2#: How many?
#Person1#: Oh, let's see. . . About six.
#Person2#: Anything else?
#Person1#: Yes. We're out of milk.
#Person2#: Okay. How much do you want me to get? A gallon?
#Person1#: No. I think a half gallon will be enough.
#Person2#: Is that all?
#Person1#: I think so. Have you got all that?
#Person2#: Yes. That's small bag of sugar, four oranges, and a half gallon of milk.
#Person1#: Do you have enough money?
#Person2#: I think so.
#Person1#: Thanks very much. I appreciate it.
k: summary
v: #Person1# asks #Person2# to do a favor. #Person2# agrees and helps buy a small

## Fine tuning the LLM model

In this workshop we will be turning the <code>google/flan-t5-base</code> model.

In [4]:
# Utility function to dump a model's tunable parameters

def print_trainable_model_params(model):
   trainable_model_params = 0
   all_model_params = 0
   for _, param in model.named_parameters():
      all_model_params += param.numel()
      if param.requires_grad:
         trainable_model_params += param.numel()
   return f"Trainable parameters: {trainable_model_params}\nTotal parameters: {all_model_params}\nPercentable of trainable parameters: {100 * trainable_model_params / all_model_params:.2f}%"

In [None]:
# TODO: Load model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
# TODO: Print number of trainable parameters
print(print_trainable_model_params(model))

Trainable parameters: 76961152
Total parameters: 76961152
Percentable of trainable parameters: 100.00%


### Preprocess the dialogue dataset

We will train the model to summarize dialogue by creating a dialogue-summary pair for the LLM to process. The dialogue is the training data and the summary is the label. This is supervized learning.

The prompt will be as follows

```
Summarize the following dialogue.\n
\n
Fred: ...\n
Barney: ...\n
\n
Summary:\n
Summary of the conversation between Fred and Barney
```

The prompt and the summary will be tokenized for the LLM

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

In [None]:
# Utitlity function to prepare the data for training
# Tokenize function
def tokenize_fn(data):
   start_prompt = 'Summarize the following dialogue.\n\n'
   end_prompt = '\n\nSummary:'
   prompt = [ start_prompt + d + end_prompt for d in data['dialogue'] ]
   summary = data['summary']
   data['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
   data['labels'] = tokenizer(summary, padding="max_length", truncation=True, return_tensors="pt").input_ids
   return data


In [None]:
# TODO: prepare the data for training with the tokenize_fn function
tokenized_dataset = dataset.map(tokenize_fn, batched=True)



Map:   0%|          | 0/12460 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [None]:
# TODO: Verify prepared data

for k, v in tokenized_dataset['train'][idx].items():
  print(f'k: {k}\nv: {v}\n')


k: id
v: train_10

k: dialogue
v: #Person1#: Could you do me a favor?
#Person2#: Sure. What is it?
#Person1#: Could you run over to the store? We need a few things.
#Person2#: All right. What do you want me to get?
#Person1#: Well, could you pick up some sugar?
#Person2#: Okay. How much?
#Person1#: A small bag. I guess we also need a few oranges.
#Person2#: How many?
#Person1#: Oh, let's see. . . About six.
#Person2#: Anything else?
#Person1#: Yes. We're out of milk.
#Person2#: Okay. How much do you want me to get? A gallon?
#Person1#: No. I think a half gallon will be enough.
#Person2#: Is that all?
#Person1#: I think so. Have you got all that?
#Person2#: Yes. That's small bag of sugar, four oranges, and a half gallon of milk.
#Person1#: Do you have enough money?
#Person2#: I think so.
#Person1#: Thanks very much. I appreciate it.

k: summary
v: #Person1# asks #Person2# to do a favor. #Person2# agrees and helps buy a small bag of sugar, six oranges, and a half-gallon of milk.

k: topi

In [None]:
print(tokenizer.decode(tokenized_dataset['train'][idx]['input_ids']))

print('---------------')

print(tokenizer.decode(tokenized_dataset['train'][idx]['labels']))

Summarize the following dialogue. #Person1#: Could you do me a favor? #Person2#: Sure. What is it? #Person1#: Could you run over to the store? We need a few things. #Person2#: All right. What do you want me to get? #Person1#: Well, could you pick up some sugar? #Person2#: Okay. How much? #Person1#: A small bag. I guess we also need a few oranges. #Person2#: How many? #Person1#: Oh, let's see. . . About six. #Person2#: Anything else? #Person1#: Yes. We're out of milk. #Person2#: Okay. How much do you want me to get? A gallon? #Person1#: No. I think a half gallon will be enough. #Person2#: Is that all? #Person1#: I think so. Have you got all that? #Person2#: Yes. That's small bag of sugar, four oranges, and a half gallon of milk. #Person1#: Do you have enough money? #Person2#: I think so. #Person1#: Thanks very much. I appreciate it. Summary:</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

In [None]:
# TODO: Remove id, dialogue, summary and topic columns from dataset. We only want input_ids and labels
tokenized_dataset = tokenized_dataset.remove_columns(['id', 'dialogue', 'summary', 'topic'])


In [None]:
# TODO: Verify dataset again
print(tokenized_dataset['train'][idx])

{'input_ids': [12198, 1635, 1737, 8, 826, 7478, 5, 1713, 345, 13515, 536, 4663, 10, 9348, 25, 103, 140, 3, 9, 4971, 58, 1713, 345, 13515, 357, 4663, 10, 10625, 5, 363, 19, 34, 58, 1713, 345, 13515, 536, 4663, 10, 9348, 25, 661, 147, 12, 8, 1078, 58, 101, 174, 3, 9, 360, 378, 5, 1713, 345, 13515, 357, 4663, 10, 432, 269, 5, 363, 103, 25, 241, 140, 12, 129, 58, 1713, 345, 13515, 536, 4663, 10, 1548, 6, 228, 25, 1432, 95, 128, 2656, 58, 1713, 345, 13515, 357, 4663, 10, 16036, 5, 571, 231, 58, 1713, 345, 13515, 536, 4663, 10, 71, 422, 2182, 5, 27, 3382, 62, 92, 174, 3, 9, 360, 5470, 7, 5, 1713, 345, 13515, 357, 4663, 10, 571, 186, 58, 1713, 345, 13515, 536, 4663, 10, 3359, 6, 752, 31, 7, 217, 5, 3, 5, 3, 5, 4504, 1296, 5, 1713, 345, 13515, 357, 4663, 10, 21345, 1307, 58, 1713, 345, 13515, 536, 4663, 10, 2163, 5, 101, 31, 60, 91, 13, 3702, 5, 1713, 345, 13515, 357, 4663, 10, 16036, 5, 571, 231, 103, 25, 241, 140, 12, 129, 58, 71, 12486, 106, 58, 1713, 345, 13515, 536, 4663, 10, 465, 5, 27, 

### Tune model with pre-processed dataset

We will use [<code>Trainer</code>](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#api-reference%20][%20transformers.Trainer) to train the original model. The training result will be written out. The trainer will be configure with [<code>TrainingArgument</code>](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments)

In [None]:
# CUDA information

print('CUDA available: ', torch.cuda.is_available())
if torch.cuda.is_available():
   print('B16 supported: ', torch.cuda.is_bf16_supported())
   torch.cuda.set_device(0)
   print('Current device: ', torch.cuda.current_device())
   print('CUDA device name: ', torch.cuda.get_device_name(0))

CUDA available:  False


## Fine tuning the LLM Model with Low-Rank Adaptation (LoRA) / Parameter Efficient Fine Tuning (PEFT)

We will add a LoRA adapter to the LLM (flan-t5-base) and train the adapter. The original LLM will be frozen. The adapter can be combined with the original LLM during inferencing.

In [None]:
print(model)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [None]:
# TODO: Configure LoRA
lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)


In [None]:
# TODO: Add LoRA to the LLM model to be trained

# load the base model
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# create a LoRA on the base model
lora_model = get_peft_model(base_model, lora_config)

In [None]:
# TODO: Print number of parameters, compare LoRA to the original model
print(print_trainable_model_params(lora_model))


Trainable parameters: 1376256
Total parameters: 78337408
Percentable of trainable parameters: 1.76%


In [None]:
# TODO: Train model with LoRA
output_dir = f'./peft-dialogue-summary-{str(int(time.time()))}'
lora_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
)

In [None]:
# TODO: Create trainer and train model
lora_trainer = Trainer(
    model=lora_model,
    args=lora_training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation']
)

# will not run these
# start the training
#lora_trainer.train()

# save the model
#lora_trainer.save_model()
#Save the tokenizer
#tokenizer.save_pretrained(output_dir)


### Use a trained LoRA model

The training will take a few hours and over many iterations.

For the purpose of this workshop we use a save model [intotheverse/peft-dialogue-summary-checkpoint](https://huggingface.co/intotheverse/peft-dialogue-summary-checkpoint).

In [6]:
#TODO: Load the original model and add the pre-trained LoRA adaptation to the model
peft_dialogue_summary_checkpoint = 'intotheverse/peft-dialogue-summary-checkpoint'
model_name = "google/flan-t5-base"

# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create original model
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, dtype=torch.bfloat16)

# Load base model
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, dtype=torch.bfloat16)

# load LoRA on base model
lora_model = PeftModel.from_pretrained(base_model, peft_dialogue_summary_checkpoint
    , dtype=torch.bfloat16, is_trainable=False)


In [7]:
print(print_trainable_model_params(lora_model))

Trainable parameters: 0
Total parameters: 251116800
Percentable of trainable parameters: 0.00%


## Evaluate LoRA model

In [None]:
# TODO: Evaluate LoRA model against the original



In [8]:
# Prepare data for evaluation
dialogues = []
summaries = []
orig_model_summaries = []
lora_model_summaries = []
config = GenerationConfig(max_new_tokens=200)

for i in range(5):
   print(f'i = {i}')
   d = dataset['test'][i]['dialogue']
   s = dataset['test'][i]['summary']
   prompt = f"Summarize the following conversation.\n\n{d}\n\nSummary:"
   tokenized_prompt = tokenizer(prompt, return_tensors='pt').input_ids
   orig_resp = original_model.generate(input_ids=tokenized_prompt, generation_config=config)
   orig_resp_text = tokenizer.decode(orig_resp[0], skip_special_tokens=True)
   lora_resp = lora_model.generate(input_ids=tokenized_prompt, generation_config=config)
   lora_resp_text = tokenizer.decode(lora_resp[0], skip_special_tokens=True)

   summaries.append(s)
   orig_model_summaries.append(orig_resp_text)
   lora_model_summaries.append(lora_resp_text)

zipped_summaries = list(zip(summaries, orig_model_summaries, lora_model_summaries))
df = pd.DataFrame(zipped_summaries, columns=['label', 'original_model_summary', 'lora_model_summary'])
df

i = 0
i = 1
i = 2
i = 3
i = 4


Unnamed: 0,label,original_model_summary,lora_model_summary
0,Ms. Dawson helps #Person1# to write a memo to ...,The memo is to be distributed to all employees...,#Person1# asks Ms. Dawson to take a dictation ...
1,In order to prevent employees from wasting tim...,The memo is to be distributed to all employees...,#Person1# asks Ms. Dawson to take a dictation ...
2,Ms. Dawson takes a dictation for #Person1# abo...,The memo is to be distributed to all employees...,#Person1# asks Ms. Dawson to take a dictation ...
3,#Person2# arrives late because of traffic jam....,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic and #Person1# s...
4,#Person2# decides to follow #Person1#'s sugges...,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic and #Person1# s...


### Evaluate models with ROUGE/Bleu metrics

Recall-Oriented Understudy for Gisting Evaluate ([ROUGE](https://pub.aimind.so/unveiling-the-power-of-rouge-metrics-in-nlp-b6d3f96d3363)) is a set of metrics used to evaluate the quality of machine-generated text, such as summaries and translations. ROUGE metrics compare the generated text to a human-written reference and measure the overlap between the two.

The metrics range between 0 and 1, with higher scores indicating higher similarity between the baseline and generated text.

In [9]:
# TODO: create ROUGE metrics
rouge = evaluate.load('rouge')

In [10]:
# TODO: Evaluate the original model's result
orig_model_score = rouge.compute(
    predictions=orig_model_summaries,
    references=summaries,
    use_aggregator=True,
    use_stemmer=True
)

lora_model_score = rouge.compute(
    predictions=lora_model_summaries,
    references=summaries,
    use_aggregator=True,
    use_stemmer=True
)

# Closer to 1 is better
print(orig_model_score)

print('\n-------------------------\n')

print(lora_model_score)

{'rouge1': np.float64(0.17391941391941393), 'rouge2': np.float64(0.03820816864295125), 'rougeL': np.float64(0.13364468864468865), 'rougeLsum': np.float64(0.13364468864468865)}

-------------------------

{'rouge1': np.float64(0.34193513803269904), 'rouge2': np.float64(0.10165150704476547), 'rougeL': np.float64(0.2728511771470072), 'rougeLsum': np.float64(0.27097500453912726)}


In [11]:
# TODO: Evaluate with Bleu metrics
bleu = evaluate.load('bleu')


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [15]:
orig_model_bleu_score = bleu.compute(
    predictions=orig_model_summaries,
    references=summaries
)

lora_model_bleu_score = bleu.compute(
    predictions=lora_model_summaries,
    references=summaries
)

# Close to 1.0 is better
print(orig_model_bleu_score)

print('\n-------------------------\n')

print(lora_model_bleu_score)

{'bleu': 0.0156413976950709, 'precisions': [0.2835820895522388, 0.08064516129032258, 0.03508771929824561, 0.019230769230769232], 'brevity_penalty': 0.24955905423103703, 'length_ratio': 0.41875, 'translation_length': 67, 'reference_length': 160}

-------------------------

{'bleu': 0.0819270536975234, 'precisions': [0.2747603833865815, 0.12337662337662338, 0.066006600660066, 0.020134228187919462], 'brevity_penalty': 1.0, 'length_ratio': 1.95625, 'translation_length': 313, 'reference_length': 160}
