In [1]:
!pip install torch torchdata --quiet

!pip install transformers datasets evaluate rouge_score loralib peft --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [2]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [3]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset

Downloading readme:   0%|          | 0.00/4.65k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [4]:
model_name ='t5-small' #'/kaggle/input/flan-t5/pytorch/base/4'
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

In [5]:
from peft import IA3Config, get_peft_model, TaskType
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [6]:
ia3_config = IA3Config(
    peft_type="IA3",
    task_type="SEQ_2_SEQ_LM",
    target_modules=["k", "v", "w0"],
    feedforward_modules=["w0"],
)

In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

In [8]:
peft_model = get_peft_model(original_model,
                            ia3_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 18432
all model parameters: 60525056
percentage of trainable model parameters: 0.03%


In [None]:
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

Map:   0%|          | 0/12460 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

In [None]:
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")

print(tokenized_datasets)

In [12]:
output_dir = f'/content/kaggle/working/peft-dialogue-summary-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=2,
    logging_steps=1,
    max_steps=1
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
)

In [13]:
peft_trainer.train()

peft_model_path="/content/kaggle/working/peft-dialogue-summary-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

Step,Training Loss
1,2.1094


('/content/kaggle/working/peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 '/content/kaggle/working/peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 '/content/kaggle/working/peft-dialogue-summary-checkpoint-local/tokenizer.json')

In [14]:
peft_model = peft_model.to('cpu')
original_model = original_model.to('cpu')

In [15]:
index = 200
dialogue = dataset['test'][index]['dialogue']
baseline_human_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

print("-----------------")
print(f'BASELINE HUMAN SUMMARY:\n{baseline_human_summary}')
print("-----------------")
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print("-----------------")
print(f'PEFT MODEL: {peft_model_text_output}')

-----------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
-----------------
ORIGINAL MODEL:
#Person2#: You'd like to upgrade your hardware, but you'd probably want to upgrade your hardware. #Person2#: You might want to add a painting program to your software. #Person2##: Yes, but I'm not sure what exactly exactly I'd need.
-----------------
PEFT MODEL: :: a faster processor, more memory and a faster modem. #Person2#: Yes, but I'm not sure what exactly I would need. #Person2#: Yes, but I'm not sure what exactly I would need. #Person2#: Yes, but I'm not sure what exactly I would need. #Person2#: No.


In [20]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    human_baseline_text_output = human_baseline_summaries[idx]

    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)

    peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,"#Person2#: Yes, sir. Go ahead with the memo. #...",:: Yes. Yes.. Do not let employees use Instant...
1,In order to prevent employees from wasting tim...,": Yes, sir. Go ahead. #Person2#: Yes, sir. Go ...","#Person1#: Ms. Dawson, I need you to take a di..."
2,Ms. Dawson takes a dictation for #Person1# abo...,": Yes, sir.: Yes, sir. Go ahead.: Yes, sir. Go...",: This memo should be typed up and distributed...
3,#Person2# arrives late because of traffic jam....,: I'm going to quit driving to work. #Person2#...,: I'm going to quit driving to work. #Person2#...
4,#Person2# decides to follow #Person1#'s sugges...,": Yes, it's not good for me or for the environ...",: I'm going to quit driving to work.Person1#: ...
5,#Person2# complains to #Person1# about the tra...,: I'm going to miss having the freedom that yo...,: You're here!: You're going to quit driving t...
6,#Person1# tells Kate that Masha and Hero get d...,"::: What's happened?: Masha and Hero, the perf...",: Masha and Hero are getting divorced.: What's...
7,#Person1# tells Kate that Masha and Hero are g...,":: Kate, you never believe what's happened. #P...",: What happened? #Person2#: Masha and Hero are...
8,#Person1# and Kate talk about the divorce betw...,": What happened? #Person2#: Masha, you never b...",...? #Person1#: What do you mean? #Person2#: W...
9,#Person1# and Brian are at the birthday party ...,": Happy Birthday, Brian.:, Brian. Brian:::: Th...",":: Happy Birthday, Brian.:Person1#: Happy Birt..."


In [21]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)


peft_model_results = rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.16713172280200256, 'rouge2': 0.03060803048184188, 'rougeL': 0.1264361669057275, 'rougeLsum': 0.12818161474255113}
PEFT MODEL:
{'rouge1': 0.23794232813504415, 'rouge2': 0.0643904591083655, 'rougeL': 0.18379555347355786, 'rougeLsum': 0.18370112185775603}


In [22]:
bleu = evaluate.load("bleu")

original_model_results = bleu.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    #use_aggregator=True,  # Aggregate scores across all n-grams
    smooth="epsilon"  # Apply smoothing to avoid zero scores (optional)
)

peft_model_results = bleu.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    #use_aggregator=True,
    smooth="epsilon"
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'bleu': 0.022326593464299244, 'precisions': [0.14668769716088328, 0.04487179487179487, 0.02280130293159609, 0.0016556291390728477], 'brevity_penalty': 1.0, 'length_ratio': 2.2607142857142857, 'translation_length': 633, 'reference_length': 280}
PEFT MODEL:
{'bleu': 0.03672286528949113, 'precisions': [0.1657142857142857, 0.07246376811594203, 0.033823529411764704, 0.004477611940298508], 'brevity_penalty': 1.0, 'length_ratio': 2.4964285714285714, 'translation_length': 699, 'reference_length': 280}
