In [3]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1 --quiet

%pip install \
    transformers==4.27.2 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    loralib==0.1.1 \
    peft==0.3.0 --quiet

Collecting pip
  Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.3.1
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.1/317.1 MB[0m [31m4.3 MB/s[0m eta [36m0:

In [4]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [5]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset

Downloading readme:   0%|          | 0.00/4.65k [00:00<?, ?B/s]

Downloading and preparing dataset csv/knkarthick--dialogsum to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/442k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/knkarthick___csv/knkarthick--dialogsum-cd36827d3490488d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
})

In [6]:
model_name ='t5-small' #'/kaggle/input/flan-t5/pytorch/base/4'
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

In [7]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM #T5-small
)

In [8]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

In [9]:
peft_model = get_peft_model(original_model,
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 1179648
all model parameters: 61686272
percentage of trainable model parameters: 1.91%


In [10]:
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

Map:   0%|          | 0/12460 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [11]:
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

Filter:   0%|          | 0/12460 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/500 [00:00<?, ? examples/s]

In [12]:
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")

print(tokenized_datasets)

Shapes of the datasets:
Training: (125, 2)
Validation: (5, 2)
Test: (15, 2)
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 125
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 15
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 5
    })
})


In [17]:
output_dir = f'/content/kaggle/working/peft-dialogue-summary-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    logging_steps=1,
    max_steps=1
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
)

In [19]:
peft_trainer.train()

peft_model_path="/content/kaggle/working/peft-dialogue-summary-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)



Step,Training Loss
1,2.3906


('/content/kaggle/working/peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 '/content/kaggle/working/peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 '/content/kaggle/working/peft-dialogue-summary-checkpoint-local/tokenizer.json')

In [21]:
peft_model = peft_model.to('cpu')
original_model = original_model.to('cpu')

In [24]:
index = 200
dialogue = dataset['test'][index]['dialogue']
baseline_human_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

print("-----------------")
print(f'BASELINE HUMAN SUMMARY:\n{baseline_human_summary}')
print("-----------------")
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print("-----------------")
print(f'PEFT MODEL: {peft_model_text_output}')

-----------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
-----------------
ORIGINAL MODEL:
:: You'd probably want to add a CD-ROM drive too, because most new software programs are coming out on Cds. #Person1#: You might also want to add a painting program to your software. It would allow you to make your own flyers and banners for advertising.
-----------------
PEFT MODEL: : You might need a faster processor, a faster processor and a faster modem. #Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.


### ROGUE Score

In [27]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    human_baseline_text_output = human_baseline_summaries[idx]

    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)

    peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,": I'm a dictation for me.Person1#: Yes, sir. G...","a reminder. #Person2#: Yes, sir. Go ahead. #Pe..."
1,In order to prevent employees from wasting tim...,": Yes, sir. Go ahead. #Person1#: Yes. Please c...",: Yes. Yes.r. Go ahead..Person1#: Yes. Please ...
2,Ms. Dawson takes a dictation for #Person1# abo...,: Yes. I need to take a dictation for me. #Per...,":Person1#:, I need you to take a dictation for..."
3,#Person2# arrives late because of traffic jam....,: I'm not sure if you could use the public tra...,: I'm going to really miss the freedom that yo...
4,#Person2# decides to follow #Person1#'s sugges...,: I'm going to be a bit stuck in traffic. #Per...,: I'm going to quit driving to work. #Person1#...
5,#Person2# complains to #Person1# about the tra...,: I'm going to quit driving to work. #Person1#...,: I'm going to quit driving to work. #Person1#...
6,#Person1# tells Kate that Masha and Hero get d...,": What do you mean? #Person2#: Kate, you never...",": Kate, you never believe what's happened. #Pe..."
7,#Person1# tells Kate that Masha and Hero are g...,": Masha, you never believe what's happened, an...",:: What happened? #Person2#: What do you mean?...
8,#Person1# and Kate talk about the divorce betw...,":: Masha, it seems quiet and makable, no quarr...","Kate, you never believe what's happened. #Pers..."
9,#Person1# and Brian are at the birthday party ...,": Happy Birthday, Brian.: Happy Birthday, this...",": Thanks, you look great, and you look great. ..."


In [29]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)


peft_model_results = rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.26153429187653515, 'rouge2': 0.07472947450104889, 'rougeL': 0.1857782630933899, 'rougeLsum': 0.1845791182590585}
PEFT MODEL:
{'rouge1': 0.20865789228948467, 'rouge2': 0.06902625566804671, 'rougeL': 0.1623476543458303, 'rougeLsum': 0.16141792664371646}


In [35]:
bleu = evaluate.load("bleu")

original_model_results = bleu.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    #use_aggregator=True,  # Aggregate scores across all n-grams
    smooth="epsilon"  # Apply smoothing to avoid zero scores (optional)
)

peft_model_results = bleu.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    #use_aggregator=True,
    smooth="epsilon"
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'bleu': 0.037787525938879414, 'precisions': [0.18547140649149924, 0.06750392464678179, 0.03349282296650718, 0.004862236628849271], 'brevity_penalty': 1.0, 'length_ratio': 2.307142857142857, 'translation_length': 646, 'reference_length': 280}
PEFT MODEL:
{'bleu': 0.035726259538605726, 'precisions': [0.1735668789808917, 0.059870550161812294, 0.03125, 0.005016722408026756], 'brevity_penalty': 1.0, 'length_ratio': 2.2392857142857143, 'translation_length': 627, 'reference_length': 280}


## Different Paras

In [13]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=8, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM #T5-small
)

In [14]:
peft_model = get_peft_model(original_model,
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 294912
all model parameters: 60801536
percentage of trainable model parameters: 0.49%


In [15]:
output_dir = f'/content/kaggle/working/peft-dialogue-summary-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3, # Higher learning rate than full fine-tuning.
    num_train_epochs=1,
    logging_steps=1,
    max_steps=1
)

peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
)

In [16]:
peft_model = peft_model.to('cpu')
original_model = original_model.to('cpu')

In [17]:
index = 200
dialogue = dataset['test'][index]['dialogue']
baseline_human_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

input_ids = tokenizer(prompt, return_tensors="pt").input_ids

original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

print("-----------------")
print(f'BASELINE HUMAN SUMMARY:\n{baseline_human_summary}')
print("-----------------")
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print("-----------------")
print(f'PEFT MODEL: {peft_model_text_output}')

-----------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
-----------------
ORIGINAL MODEL:
::: You might want to upgrade your hardware because it is pretty outdated now.: No., more memory and a faster modem.: Yes, but I'm not sure what exactly I would need.: Yes.: Yes.: You might want to add a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
-----------------
PEFT MODEL: ::: You might want to upgrade your hardware because it is pretty outdated now.: No., more memory and a faster modem.: Yes, but I'm not sure what exactly I would need.: Yes.: Yes.: You might want to add a painting program to your software. It would allow you to make up your own flyers and banners for advertising.


In [19]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    human_baseline_text_output = human_baseline_summaries[idx]

    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)

    peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,": Yes, sir. Go ahead. #Person2#: Yes. Please g...",": Yes, sir. Go ahead. #Person2#: Yes. Please g..."
1,In order to prevent employees from wasting tim...,": Yes, sir. Go ahead. #Person2#: Yes. Please g...",": Yes, sir. Go ahead. #Person2#: Yes. Please g..."
2,Ms. Dawson takes a dictation for #Person1# abo...,": Yes, sir. Go ahead. #Person2#: Yes. Please g...",": Yes, sir. Go ahead. #Person2#: Yes. Please g..."
3,#Person2# arrives late because of traffic jam....,: I'm going to really miss having the freedom ...,: I'm going to really miss having the freedom ...
4,#Person2# decides to follow #Person1#'s sugges...,: I'm going to really miss having the freedom ...,: I'm going to really miss having the freedom ...
5,#Person2# complains to #Person1# about the tra...,: I'm going to really miss having the freedom ...,: I'm going to really miss having the freedom ...
6,#Person1# tells Kate that Masha and Hero get d...,":: Kate, you never believe what's happened.: K...",":: Kate, you never believe what's happened.: K..."
7,#Person1# tells Kate that Masha and Hero are g...,":: Kate, you never believe what's happened.: K...",":: Kate, you never believe what's happened.: K..."
8,#Person1# and Kate talk about the divorce betw...,":: Kate, you never believe what's happened.: K...",":: Kate, you never believe what's happened.: K..."
9,#Person1# and Brian are at the birthday party ...,": Happy Birthday, this is for you, Brian. #Per...",": Happy Birthday, this is for you, Brian. #Per..."


In [20]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)


peft_model_results = rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.12895856281993134, 'rouge2': 0.011802488960256924, 'rougeL': 0.11537622734761191, 'rougeLsum': 0.11676047223516275}
PEFT MODEL:
{'rouge1': 0.12895856281993134, 'rouge2': 0.011802488960256924, 'rougeL': 0.11537622734761191, 'rougeLsum': 0.11676047223516275}


In [21]:
bleu = evaluate.load("bleu")

original_model_results = bleu.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    #use_aggregator=True,  # Aggregate scores across all n-grams
    smooth="epsilon"  # Apply smoothing to avoid zero scores (optional)
)

peft_model_results = bleu.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    #use_aggregator=True,
    smooth="epsilon"
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

ORIGINAL MODEL:
{'bleu': 0.025273913229042037, 'precisions': [0.16247139588100687, 0.03044496487119438, 0.016786570743405275, 0.004914004914004914], 'brevity_penalty': 1.0, 'length_ratio': 1.5571428571428572, 'translation_length': 436, 'reference_length': 280}
PEFT MODEL:
{'bleu': 0.025273913229042037, 'precisions': [0.16247139588100687, 0.03044496487119438, 0.016786570743405275, 0.004914004914004914], 'brevity_penalty': 1.0, 'length_ratio': 1.5571428571428572, 'translation_length': 436, 'reference_length': 280}
