## Prompt Tuning 

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import get_peft_model, PromptTuningConfig, PromptTuningInit, TaskType
import torch
import prompt_tuning
import os
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


### Prepare Model Config

In [2]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

cuda_id = ':6'
device = torch.device(f"cuda{cuda_id}" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# determine parameters for PromptTuning Configuration
model_name = 'google/flan-t5-large'
num_sub_modules = 2
max_length = 128
lr = 0.1
num_epochs = 10
batch_size = 8

tokenizer = AutoTokenizer.from_pretrained(model_name) #, model_max_length=max_length)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

init_text = "Given the following description, find the described target word. The target word is not contained in the description:"

# since the virtual tokens are shared across two transformers submodules the number of virtual tokens is computed by len(init_text_ids)/2
init_token_ids = tokenizer(init_text)["input_ids"]
num_text_tokens = len(init_token_ids)
print(f"Number of initalization tokens: {num_text_tokens}")

num_virtual_tokens = num_text_tokens//num_sub_modules
print(f"Number of virtual tokens: {num_virtual_tokens}")

peft_config = PromptTuningConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=num_virtual_tokens,
    num_transformer_submodules=num_sub_modules,
    inference_mode=False,
    prompt_tuning_init_text=init_text,
    tokenizer_name_or_path=model_name,
)

# create instance of PEFT-model
peft_model = get_peft_model(model, peft_config)
print(peft_model.active_peft_config)
print(peft_model.print_trainable_parameters())

Device: cuda:6


Number of initalization tokens: 22
Number of virtual tokens: 11
PromptTuningConfig(peft_type=<PeftType.PROMPT_TUNING: 'PROMPT_TUNING'>, auto_mapping=None, base_model_name_or_path='google/flan-t5-large', revision=None, task_type=<TaskType.SEQ_2_SEQ_LM: 'SEQ_2_SEQ_LM'>, inference_mode=False, num_virtual_tokens=11, token_dim=1024, num_transformer_submodules=2, num_attention_heads=16, num_layers=24, prompt_tuning_init=<PromptTuningInit.TEXT: 'TEXT'>, prompt_tuning_init_text='Given the following description, find the described target word. The target word is not contained in the description:', tokenizer_name_or_path='google/flan-t5-large')
trainable params: 22,528 || all params: 783,172,608 || trainable%: 0.0028765050986052873
None


### Data Preprocessing

In [3]:
data_path = "/local/js/BERT_Friends/data/coca_1000_final.csv"

train, eval, test, full_data = prompt_tuning.make_dataset(data_path)

dataset_preproc = full_data.map(
    prompt_tuning.preprocess_seq2seq,
    batched=True,
    num_proc=1,
    remove_columns=full_data['train'].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

train_dataloader, eval_dataloader, test_dataloader = prompt_tuning.make_dataloader(dataset_preproc, batch_size)

Number of descriptions: 1000
Number of target words: 1000


Running tokenizer on dataset: 100%|██████████| 800/800 [00:00<00:00, 5809.20 examples/s]
Running tokenizer on dataset: 100%|██████████| 100/100 [00:00<00:00, 4337.71 examples/s]
Running tokenizer on dataset: 100%|██████████| 100/100 [00:00<00:00, 5612.61 examples/s]


### Training

In [None]:
train_preds_total, eval_preds_final = prompt_tuning.training(train_dataloader, eval_dataloader, peft_model, num_epochs, lr, device)

### Evaluation

In [4]:
# set number for beam search and sequences to be returned
num_beams = 5
num_seqs = 5
max_beam_size = 10

In [5]:
# at inference for specific k

# add argument 'file_spec=' with file path if you want to save the predicitons as a csv file
# add argument 'zero-shot=True' if you want to get the predictions of FLAN-T5 large without being fine-tuned
# add argument 'peft_model_id' when evaluating the prompt-tuned model with path to peft model bins

peft_model_id = "/local/js/prompt_tuning_bins/pt_20ep_01lr"
test_targets, test_preds = prompt_tuning.eval_seq2seq(test, test_dataloader, model.to('cpu'), peft_model_id=peft_model_id, num_beams=num_beams, num_seqs=num_seqs, device=device)
acc = prompt_tuning.acc(test_targets, test_preds)

Model is on GPU
Top-k correct predictions: 72


In [None]:
# top-k prompt tuned
top_k_acc_tuned = {}
for i in range(max_beam_size):
    _ , test_preds = prompt_tuning.eval_seq2seq(test, test_dataloader, model.to('cpu'), peft_model_id, i+1, i+1, device)
    acc = prompt_tuning.acc(test['target'], test_preds)
    top_k_acc_tuned[i+1] = acc


In [None]:
# top-k zero-shot
top_k_acc_zs = {}
for i in range(max_beam_size):
    _ , test_preds = prompt_tuning.eval_seq2seq(test, test_dataloader, model.to('cpu'), i+1, i+1, device, zero_shot=True)
    acc = prompt_tuning.acc(test['target'], test_preds)
    top_k_acc_zs[i+1] = acc

In [None]:
# plot zero-shot vs. prompt-tuned scores
plt.plot(top_k_acc_tuned.keys(), top_k_acc_tuned.values(), label="Prompt Tuned", color="blue")
plt.plot(top_k_acc_zs.keys(), top_k_acc_zs.values(), label="Zero Shot", color="red")
plt.ylabel("top-k accuracy")
plt.xlabel("k")
plt.title("k vs. top-k Accuracy FLAN-T5 large")
plt.legend()
plt.show()

### Playground for prompt-tuned model at inference

In [None]:
device = 'cuda:6'

# make sure to replace the peft_model_id with the path to the folder that contains the peft model bins
peft_model_id = "/local/js/prompt_tuning_bins/pt_20ep_01lr"

# load peft model
model, tokenizer = prompt_tuning.get_peft_model_inference(peft_model_id)
model.to(device)

In [7]:
def model_generate(sentence, num_beams=10, num_seqs=10):
    '''
    This function generates targets for a given description using the previously initalized prompt-tuned model.
    '''
    model.eval()
    with torch.no_grad():
        if sentence[-1] != ".":
           sentence = sentence + "."
        
        inputs = [f"Description : {sentence} Word : "]
        print(inputs)
        inputs = tokenizer(inputs, return_tensors="pt",)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        beam_outputs = model.generate(
            input_ids=inputs['input_ids'], max_new_tokens=10, num_beams=num_beams, num_return_sequences=num_seqs, early_stopping=True
        )
        
        beam_preds = []
        for beam_output in beam_outputs:
            prediction = tokenizer.decode(beam_output.detach().cpu().numpy(), skip_special_tokens=True)
            beam_preds.append(prediction)

        return beam_preds

In [8]:
# try out model capabilities with any description
description = 'a hairy thing growing on most of men on the upper side of their mouth'
model_generate(description)

['Description : a hairy thing growing on most of men on the upper side of their mouth. Word : ']


['moustache',
 'beard',
 'mouton',
 'shave',
 'mouty',
 'molar',
 'upper lip',
 'afro',
 'mout',
 'facial hair']