# Install Requirements

In [None]:
# !pip install -q -U pip --progress-bar off
# # !pip install -q bitsandbytes==0.41.2 --progress-bar off
# !pip install -q torch==2.0.1 --progress-bar off
# !pip install -q -U git+https://github.com/lvwerra/trl.git --progress-bar off
# !pip install -q -U bitsandbytes --progress-bar off
# !pip install -q -U git+https://github.com/huggingface/transformers@de9255de27abfcae4a1f816b904915f0b1e23cd9 --progress-bar off
# !pip install -q -U git+https://github.com/huggingface/peft.git --progress-bar off
# !pip install -q -U git+https://github.com/huggingface/accelerate.git --progress-bar off
# !pip install -q loralib==0.1.1 --progress-bar off
# !pip install -q einops==0.6.1 --progress-bar off
# !pip install -q -U datasets --progress-bar off
# !pip install -q -U wandb --progress-bar off
# !pip install -q evaluate==0.4.0 --progress-bar off
# !pip install -q rouge_score==0.1.2 --progress-bar off

# Import Library and Helper Function

In [None]:
from datasets import load_dataset
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForSeq2SeqLM,
    GenerationConfig,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import (PeftModel,
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)

# trl: Transformer Reinforcement Learning library
from trl import (
    PPOTrainer,
    PPOConfig,
    AutoModelForSeq2SeqLMWithValueHead
)
from trl import create_reference_model
from trl.core import LengthSampler

import torch
from datetime import datetime
import evaluate

import numpy as np
import pandas as pd

# tqdm library makes the loops show a smart progress meter.
from tqdm import tqdm
tqdm.pandas()

# Load Dataset

In [None]:
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [None]:
dataset['train']['id'][0]

'train_0'

In [None]:
dataset['train']['dialogue'][0]

"#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?\n#Person2#: I found it would be a good idea to get a check-up.\n#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.\n#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?\n#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.\n#Person2#: Ok.\n#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?\n#Person2#: Yes.\n#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.\n#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.\n#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.\n#Person2#: Ok, thanks doctor."

In [None]:
dataset['train']['summary'][0]

"Mr. Smith's getting a check-up, and Doctor Hawkins advises him to have one every year. Hawkins'll give some information about their classes and medications to help Mr. Smith quit smoking."

In [None]:
dataset['train']['topic'][0]

'get a check-up'

# Load Model

## Original Model

In [None]:
model_name='google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name,
                                                       device_map="auto",
                                                       torch_dtype=torch.bfloat16
                                                      )
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
original_model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (wo):

In [None]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(original_model))

trainable model parameters: 247577856
all model parameters: 247577856
percentage of trainable model parameters: 100.00%


## PEFT Model

In [None]:
model_name='google/flan-t5-base'
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

original_quantized_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
original_quantized_model.gradient_checkpointing_enable()
original_quantized_model = prepare_model_for_kbit_training(original_quantized_model)

In [None]:
lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [None]:
original_peft_model = get_peft_model(original_quantized_model,
                            lora_config)
print(print_number_of_trainable_model_parameters(original_peft_model))

trainable model parameters: 3538944
all model parameters: 170900736
percentage of trainable model parameters: 2.07%


# Zero Shot Sanity Check

In [None]:
tokenizer

T5TokenizerFast(name_or_path='google/flan-t5-base', vocab_size=32100, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>'

In [None]:
index = 200

dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt').to(device="cuda:0")
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"],
        max_new_tokens=200,
    )[0],
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.

#Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?
#Person2#: No.
#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.
#Person2#: That sounds great. Thanks.

Summary:

-------------------------------------------------------------------

# Data Preprocessing

## Instruct Tuning the Dataset

In [None]:
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

Map:   0%|          | 0/12460 [00:00<?, ? examples/s]

In [None]:
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")
print(f"Validation: {tokenized_datasets['validation'].shape}")
print(f"Test: {tokenized_datasets['test'].shape}")

print(tokenized_datasets)

Shapes of the datasets:
Training: (12460, 2)
Validation: (500, 2)
Test: (1500, 2)
DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 500
    })
    test: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 1500
    })
})


In [None]:
# tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 100 == 0, with_indices=True)

In [None]:
# print(f"Shapes of the datasets:")
# print(f"Training: {tokenized_datasets['train'].shape}")
# print(f"Validation: {tokenized_datasets['validation'].shape}")
# print(f"Test: {tokenized_datasets['test'].shape}")

# print(tokenized_datasets)

# Training

In [None]:
base_model_name = "flan-t5"
project = "dialogsum-peft"
run_name = base_model_name + "-" + project
base_dir = "./experiments/"
output_dir = base_dir + "history/" + run_name
model_dir = base_dir + run_name

In [None]:
# try:
#     import shutil
#     shutil.rmtree(output_dir)
# except:
#     pass

In [None]:
training_args = TrainingArguments(
    output_dir=output_dir,
    resume_from_checkpoint=True,
    auto_find_batch_size=True,
    logging_steps=1,
    max_steps=-1,
    num_train_epochs=1,
    learning_rate=1e-3,
    # fp16=True,
    # fp16_full_eval=True,
    weight_decay=0.01,
    run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}",
    hub_private_repo=True,
    push_to_hub=True
)

trainer = Trainer(
    model=original_peft_model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)
original_peft_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/ghiffaryr/flan-t5-dialogsum-peft into local empty directory.


Download file adapter_model.safetensors:   0%|          | 8.00k/13.5M [00:00<?, ?B/s]

Clean file adapter_model.safetensors:   0%|          | 1.00k/13.5M [00:00<?, ?B/s]

In [None]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mfata-ghiffaryr[0m ([33mfata-organa[0m). Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1,49.8539
2,47.4283
3,39.5534
4,37.253
5,32.3372
6,28.9527
7,26.3835
8,23.9999
9,20.469
10,17.5401


TrainOutput(global_step=1558, training_loss=0.4293429307443531, metrics={'train_runtime': 2033.1753, 'train_samples_per_second': 6.128, 'train_steps_per_second': 0.766, 'total_flos': 5597097288007680.0, 'train_loss': 0.4293429307443531, 'epoch': 1.0})

In [None]:
original_peft_model.save_pretrained(model_dir)

In [None]:
user = "ghiffaryr"
repo = user + "/" + run_name
original_peft_model.push_to_hub(
    repo, private=True
)

adapter_model.safetensors:   0%|          | 0.00/14.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ghiffaryr/flan-t5-dialogsum-peft/commit/82909f64a821de9248dbf90cf91dec2aa2113252', commit_message='Upload model', commit_description='', oid='82909f64a821de9248dbf90cf91dec2aa2113252', pr_url=None, pr_revision=None, pr_num=None)

# Evaluation

## Load Fine-Tuned Model

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
config = PeftConfig.from_pretrained(model_dir)
instruct_model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path,
                                                       device_map="auto",
                                                       torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

instruct_peft_model = PeftModel.from_pretrained(instruct_model,
                                                model_dir, # or repo if online
                                                is_trainable=False
                                               )

In [None]:
instruct_peft_model

PeftModelForSeq2SeqLM(
  (base_model): LoraModel(
    (model): T5ForConditionalGeneration(
      (shared): Embedding(32128, 768)
      (encoder): T5Stack(
        (embed_tokens): Embedding(32128, 768)
        (block): ModuleList(
          (0): T5Block(
            (layer): ModuleList(
              (0): T5LayerSelfAttention(
                (SelfAttention): T5Attention(
                  (q): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=768, out_features=32, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=32, out_features=768, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
            

## Qualitative

In [None]:
print(prompt)


Summarize the following conversation.

#Person1#: Have you considered upgrading your system?
#Person2#: Yes, but I'm not sure what exactly I would need.
#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
#Person2#: That would be a definite bonus.
#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.
#Person2#: How can we do that?
#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?
#Person2#: No.
#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.
#Person2#: That sounds great. Thanks.

Summary:



In [None]:
index = 200
dialogue = dataset['test'][index]['dialogue']
human_baseline_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

input_ids = tokenizer(prompt, return_tensors="pt").to(device="cuda:0").input_ids

original_model_outputs = original_model.generate(input_ids=input_ids,
                                                 generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

instruct_peft_model_outputs = instruct_peft_model.generate(input_ids=input_ids,
                                                 generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
instruct_peft_model_text_output = tokenizer.decode(instruct_peft_model_outputs[0], skip_special_tokens=True)

print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{human_baseline_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'INSTRUCT MODEL:\n{instruct_peft_model_text_output}')

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
---------------------------------------------------------------------------------------------------
ORIGINAL MODEL:
#Person1#: I'm thinking of upgrading my computer.
---------------------------------------------------------------------------------------------------
INSTRUCT MODEL:
#Person2# suggests adding a painting program to #Person2#'s software. #Person1# suggests adding a CD-ROM drive and a CD-ROM drive.


## Quantitative (ROUGE Metric)

In [None]:
rouge = evaluate.load('rouge')

In [None]:
# dialogues = dataset['test'][0:10]['dialogue']
# human_baseline_summaries = dataset['test'][0:10]['summary']
dialogues = dataset['test']['dialogue']
human_baseline_summaries = dataset['test']['summary']

original_model_summaries = []
instruct_peft_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """
    input_ids = tokenizer(prompt, return_tensors="pt").to(device="cuda:0").input_ids

    original_model_outputs = original_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)
    original_model_summaries.append(original_model_text_output)

    instruct_peft_model_outputs = instruct_peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    instruct_peft_model_text_output = tokenizer.decode(instruct_peft_model_outputs[0], skip_special_tokens=True)
    instruct_peft_model_summaries.append(instruct_peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, instruct_peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'instruct_peft_model_summaries'])
df.head()

Token indices sequence length is longer than the specified maximum sequence length for this model (1028 > 512). Running this sequence through the model will result in indexing errors


Unnamed: 0,human_baseline_summaries,original_model_summaries,instruct_peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,#Person1#: I need to take a dictation for you.,Ms. Dawson asks Ms. Dawson to take a dictation...
1,In order to prevent employees from wasting tim...,#Person1#: I need to take a dictation for you.,Ms. Dawson asks Ms. Dawson to take a dictation...
2,Ms. Dawson takes a dictation for #Person1# abo...,#Person1#: I need to take a dictation for you.,Ms. Dawson asks Ms. Dawson to take a dictation...
3,#Person2# arrives late because of traffic jam....,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic again. #Person1...
4,#Person2# decides to follow #Person1#'s sugges...,The traffic jam at the Carrefour intersection ...,#Person2# got stuck in traffic again. #Person1...


In [None]:
original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

instruct_peft_model_results = rouge.compute(
    predictions=instruct_peft_model_summaries,
    references=human_baseline_summaries[0:len(instruct_peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print('INSTRUCT MODEL:')
print(instruct_peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.23312638231636737, 'rouge2': 0.07139135271676324, 'rougeL': 0.20175262872880692, 'rougeLsum': 0.20164635006932863}
INSTRUCT MODEL:
{'rouge1': 0.39307833239052475, 'rouge2': 0.15069397780331634, 'rougeL': 0.3123871849084561, 'rougeLsum': 0.3120152413955535}


In [None]:
print("Absolute percentage improvement of INSTRUCT MODEL over ORIGINAL MODEL")

improvement = (np.array(list(instruct_peft_model_results.values())) - np.array(list(original_model_results.values())))
for key, value in zip(instruct_peft_model_results.keys(), improvement):
    print(f'{key}: {value*100:.2f}%')

Absolute percentage improvement of INSTRUCT MODEL over ORIGINAL MODEL
rouge1: 16.00%
rouge2: 7.93%
rougeL: 11.06%
rougeLsum: 11.04%


# Deployment Test

In [None]:
base_model_name = "flan-t5"
project = "dialogsum-peft"
run_name = base_model_name + "-" + project
base_dir = "./experiments/"
output_dir = base_dir + "history/" + run_name
model_dir = base_dir + run_name

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
config = PeftConfig.from_pretrained(model_dir)
instruct_model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path,
                                                       device_map="auto",
                                                       torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

instruct_peft_model = PeftModel.from_pretrained(instruct_model,
                                                model_dir, # or repo if online
                                                is_trainable=False
                                               )

In [None]:
def instruct_input(dialogue):
    prompt = f"""
    Summarize the following conversation.

    {dialogue}

    Summary:
    """
    return prompt

In [None]:
dialogue = """
Ghiffary: Have you considered upgrading your system?
Masashi: Yes, but I'm not sure what exactly I would need.
Ghiffary: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.
Masashi: That would be a definite bonus.
Ghiffary: You might also want to upgrade your hardware because it is pretty outdated now.
Masashi: How can we do that?
Ghiffary: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?
Masashi: No.
Ghiffary: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.
Masashi: That sounds great. Thanks.
"""

In [None]:
input = instruct_input(dialogue)

In [None]:
input_ids = tokenizer(input, return_tensors="pt").to(device="cuda:0").input_ids

instruct_peft_model_outputs = instruct_peft_model.generate(input_ids=input_ids,
                                                 generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
instruct_peft_model_text_output = tokenizer.decode(instruct_peft_model_outputs[0], skip_special_tokens=True)
print(instruct_peft_model_text_output)

Masashi wants to upgrade his system and his hardware. Ghiffary suggests adding a painting program to his software and adding a CD-ROM drive.
