# Prompt Tuning Example


In [16]:
## import libraries
import creds
import pandas as pd

from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = 'bigscience/bloomz-560m'
NUM_VIRTUAL_TOKENS = 5
NUM_EPOCHS = 2

In [17]:
## load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
base_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                  trust_remote_code=True,
                                                  token=creds.HUGGINGFACE_TOKEN).to("cuda")

## Download the dataset to review items
## The dataset in mind would be the instructional prompting
instructional_prompt = load_dataset('tatsu-lab/alpaca', split = 'train')
instructional_prompt = instructional_prompt.map(lambda samples: tokenizer(samples['text']), batched = True)
instructional_prompt = instructional_prompt.select(range(100))

In [18]:
## create helper function for generating code with the LLM
def inference(model, inputs, max_new_tokens = 100):
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5,
        early_stopping=True,
        eos_token_id=tokenizer.eos_token_id,
        num_beams=6
    )

    return outputs

def text_to_token(tokenizer, input):
    return tokenizer(input, return_tensors = 'pt')

def token_to_text(tokenizer, input):
    return tokenizer.batch_decode(input, skip_special_tokens=True)

In [19]:
## Base Model Inference

In [20]:
text_sample = pd.DataFrame(instructional_prompt)['instruction'][0]
print(text_sample)

token_to_text(tokenizer,inference(base_model, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50))

Give three tips for staying healthy.


['Give three tips for staying healthy.']

In [21]:
text_sample = pd.DataFrame(instructional_prompt)['instruction'][3]
print(text_sample)

token_to_text(tokenizer,inference(base_model, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50))

How can we reduce air pollution?


['How can we reduce air pollution? increase the use of renewable energy']

In [22]:
## Applying PEFT
from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

## configuring the PEFT parameters
peft_config = PromptTuningConfig(
    task_type = TaskType.CAUSAL_LM, # this makes the model generate text
    prompt_tuning_init=PromptTuningInit.RANDOM, # initialise the virtual tokens with random numbers
    num_virtual_tokens=NUM_VIRTUAL_TOKENS, # number of virtual tokens to add and train
    tokenizer_name_or_path=model_name
)

peft_model_prompt = get_peft_model(base_model, peft_config)
print(peft_model_prompt.print_trainable_parameters())

trainable params: 5,120 || all params: 559,219,712 || trainable%: 0.0009155614314253644
None


In [23]:
## prepare folder path
import os

## create directories to hold the model when they don't exist

working_dir = './peft_baseLLM'

## it is recommended to store the models seperately
output_dir_prompt = os.path.join(working_dir, 'peft_outputs_prompt')

## create the directories if they don't exist
if not os.path.exists(working_dir):
    os.mkdir(working_dir)
if not os.path.exists(output_dir_prompt):
    os.mkdir(output_dir_prompt)

In [30]:
## prepare the training config
from transformers import TrainingArguments

def create_training_arguments(path, learning_rate = 0.003, epochs = 6):
    training_args = TrainingArguments(
        output_dir=path,
        learning_rate=learning_rate,
        num_train_epochs=epochs,
        per_device_train_batch_size=4, fp16=True,
    )

In [31]:
## model fine tuning via PEFT
from transformers import Trainer, DataCollatorForLanguageModeling

training_args_prompt = create_training_arguments(output_dir_prompt, 0.003, 2)

def create_trainer(model, training_args, train_dataset):
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
    )

    return trainer

In [32]:
## train
trainer_prompt = create_trainer(peft_model_prompt, training_args_prompt, instructional_prompt)
trainer_prompt.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 8.17 GiB. GPU 0 has a total capacity of 22.19 GiB of which 3.48 GiB is free. Process 2752694 has 1.46 GiB memory in use. Including non-PyTorch memory, this process has 17.08 GiB memory in use. Of the allocated memory 15.60 GiB is allocated by PyTorch, and 1.02 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
## train
trainer_prompt = create_trainer(peft_model_prompt, training_args_prompt, instructional_prompt)
trainer_prompt.train()



Step,Training Loss


TrainOutput(global_step=12, training_loss=3.6394952138264975, metrics={'train_runtime': 2.5738, 'train_samples_per_second': 116.557, 'train_steps_per_second': 4.662, 'total_flos': 24807401127936.0, 'train_loss': 3.6394952138264975, 'epoch': 3.0})

In [26]:
## save the model
trainer_prompt.model.save_pretrained(output_dir_prompt)

## load the model
from peft import PeftModel

loaded_model_prompt = PeftModel.from_pretrained(base_model,
                                         output_dir_prompt,
                                         #device_map='auto',
                                         is_trainable=False)

In [29]:
text_sample = pd.DataFrame(instructional_prompt)['instruction'][0]
print(text_sample)

token_to_text(tokenizer,inference(loaded_model_prompt, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Give three tips for staying healthy.




['Give three tips for staying healthy.\n\n“We’ve got a lot of work to do,” he said. “We’ve got a lot of work to do. We’ve got a lot of work to do. We’ve got a lot of work to do']

In [30]:
text_sample = pd.DataFrame(instructional_prompt)['instruction'][1]
print(text_sample)

token_to_text(tokenizer,inference(loaded_model_prompt, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


What are the three primary colors?


['What are the three primary colors?\n\nA:\n\nThe first two colors are the same as the last one. The second color is the same as the last one. The third color is the same as the last one. The fourth color is the same as the last one']

In [31]:
text_sample = 'I want you to be a motivational speaker.'
print(text_sample)

print(token_to_text(tokenizer,inference(base_model, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50)))
print(token_to_text(tokenizer,inference(loaded_model_prompt, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50)))


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


I want you to be a motivational speaker.


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


["I want you to be a motivational speaker.\n\nA:\n\nIf you're going to be a motivational speaker, you need to know what you're talking about. If you're going to be a motivational speaker, you need to know what you're talking about. You need to know"]
['I want you to be a motivational speaker.\n\nA:\n\nYou can use the following code:\npublic static void Main(string[] args)\n{\n    StringBuilder sb = new StringBuilder();\n    for (int i = 0; i < args.length; i']


In [32]:
text_sample = 'Tell me 2 things that matter:'
print(text_sample)

print(token_to_text(tokenizer,inference(base_model, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50)))
print(token_to_text(tokenizer,inference(loaded_model_prompt, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50)))


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Tell me 2 things that matter:


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


['Tell me 2 things that matter:\n\n1. What is the meaning of this sentence?\n\n2. What is the meaning of this sentence?\n\n3. What is the meaning of this sentence?\n\n4. What is the meaning of this sentence?\n\n']
["Tell me 2 things that matter:\n\n1.  I'm not sure if this is a good idea, but I think it's a good idea.\n2.  I'm not sure if this is a good idea, but I think it's a good idea.\n"]


In [33]:
text_sample = 'What 2 things matter most in life:'
print(text_sample)

print(token_to_text(tokenizer,inference(base_model, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50)))
print(token_to_text(tokenizer,inference(loaded_model_prompt, text_to_token(tokenizer, text_sample).to('cuda'), max_new_tokens=50)))


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


What 2 things matter most in life:


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


['What 2 things matter most in life:\n\n1. What is the meaning of the word?\n\n2. What is the meaning of the word?\n\n3. What is the meaning of the word?\n\n4. What is the meaning of the word?\n\n']
['What 2 things matter most in life:\n\n1. If you’re looking for a job, it’d be great if you’re looking for a job that’s worth more than what you’re looking for.\n\n2. If you’re looking for a']
