In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
from transformers import Trainer, TrainingArguments
import numpy as np
import evaluate

## FLAN T5 prompting

In [2]:
model_name = "google/flan-t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
device = "cuda:2"
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map=device)

input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


<pad> Wie ich er bitten?</s>


## Zero shot sentiment classification

In [3]:
instruct_promt = "Please classify the sentiment of the following statement as 'positive' or 'negative':"
instruct_promt

"Please classify the sentiment of the following statement as 'positive' or 'negative':"

In [4]:
input_text = "I hate this film. It's so boring!"
input_prompt = instruct_promt + input_text

In [15]:
inputs = tokenizer(input_prompt, return_tensors="pt")
inputs = inputs.to(device)
outputs = model.generate(inputs['input_ids'])
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['negative']

### Working with IMDB

In [6]:
dataset = load_dataset("imdb", split="test")
dataset = dataset.rename_column("label", "labels") #match trainer column
dataset[0]

{'text': 'I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn\'t match the background, and painfully one-dimensional characters cannot be overcome with a \'sci-fi\' setting. (I\'m sure there are those of you out there who think Babylon 5 is good sci-fi TV. It\'s not. It\'s clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It\'s really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it\'s rubbish as 

In [7]:
inputs = tokenizer(instruct_promt+dataset[0]['text'], return_tensors="pt")
inputs = inputs.to(device)
outputs = model.generate(**inputs)
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['negative']

In [12]:
inputs.keys()

dict_keys(['input_ids', 'attention_mask'])

In [13]:
inputs['attention_mask'].shape

torch.Size([1, 382])

In [None]:
def preprocess_function(examples):
    return tokenizer([instruct_promt+text for text in examples["text"]],
                     truncation=True,
                     padding="max_length")

In [None]:
tokenized_dataset = dataset.map(preprocess_function, batched=True, num_proc=10)

In [None]:
tokenized_dataset[0].keys()

In [None]:
metric = evaluate.load("f1")
def compute_metrics(eval_pred):
    print(eval_pred)
    # predictions, labels = eval_pred
    
    predictions = np.argmax(predictions, axis=1)
    
    return metric.compute(predictions=predictions,
                          references=labels)

In [None]:
# eval
training_args = TrainingArguments(
    output_dir="../results/promting_T5",
    per_device_eval_batch_size=64,
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    eval_dataset=tokenized_dataset
)

In [None]:
evaluation_results = trainer.evaluate()

In [None]:
def postprocess_text(predictions, labels):
    predictions = [prediction.strip() for prediction in predictions]
    labels = [label2id[label.strip()] for label in labels]

    for idx in range(len(predictions)):
        if predictions[idx] in label2id:
           predictions[idx] = label2id[predictions[idx]]
        else:
            predictions[idx] = '-100'
    return predictions, labels

In [None]:
def preprocess_prompt(examples, top_k_indices, corpus, labels, id2label):
    sentences = examples["text"]
    prompts = []
    for index, _ in enumerate(zip(sentences)):
        prompt = 'Here are examples of texts and their sentiments'
        top_indexs = top_k_indices[index]['top_index']
        for top_index in top_indexs:
            top_sentence = corpus[top_index]
            top_label = id2label[str(labels[top_index])]
            prompt = " ".join(
                [
                    prompt,
                    ". Text: ", 
                    top_sentence,
                    ". Sentiment: ",
                    top_label
                ]
            )
        prompts.append(prompt)
    
    examples["prompt"] = prompts
    return examples

In [None]:
input_ids = tokenizer(
    "Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids.to(device)  # Batch size 1
decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids.to(device)  # Batch size 1

# preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model.
# This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg.
decoder_input_ids = model._shift_right(decoder_input_ids)

# forward pass
outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
last_hidden_states = outputs.last_hidden_state

In [None]:
last_hidden_states.shape

## Evaluate on IMDB dataset

In [None]:
metric = evaluate.load("f1")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    return metric.compute(predictions=predictions,
                          references=labels)

In [None]:
# eval
training_args = TrainingArguments(
    output_dir="../results/promting_T5",
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch"
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    eval_dataset=tokenized_dataset
)

In [None]:
evaluation_results = trainer.evaluate()