[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tanikina/low-resource-nlp-lab/blob/main/notebooks/OpenPrompt_Tutorial.ipynb)

<img src="images/open-prompt.png" alt="OpenPrompt" width="200"/>

**Documentation:** [https://thunlp.github.io/OpenPrompt/](https://thunlp.github.io/OpenPrompt/)

**Examples & Tutorials:** [https://github.com/thunlp/OpenPrompt/tree/main/tutorial](https://github.com/thunlp/OpenPrompt/tree/main/tutorial)

**Paper:** [OpenPrompt: An Open-source Framework for Prompt-learning (Ding et al., 2022)](https://aclanthology.org/2022.acl-demo.10.pdf)

### OpenPrompt Workflow
<img src="images/open-prompt-workflow.png" alt="OpenPrompt Workflow" width="700"/>

In [None]:
! pip install openprompt
! pip install datasets
! pip install transformers [torch]

In [1]:
from openprompt import PromptForClassification
from openprompt.plms import load_plm

from openprompt.prompts import ManualTemplate, ManualVerbalizer, SoftTemplate
from openprompt import PromptDataLoader

from openprompt.data_utils.utils import InputExample
from openprompt.data_utils.data_processor import DataProcessor

from datasets import Dataset, load_dataset
from typing import List, Dict
import torch
import os



In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

### 🗃️ Dataset Preparation

In [3]:
class CustomPromptProcessor(DataProcessor):

    def __init__(self, label_word2idx: Dict[int, str]):
        super().__init__()
        self.label_word2idx = label_word2idx
        self.idx2label_word = {v: k for k, v in self.label_word2idx.items()}

    def get_examples(self, dataset: Dataset) -> List[InputExample]:        
        examples = []
        full_src_lst = []
        full_tgt_lst = []
        for i in range(len(dataset)):
            full_src_lst.append(dataset["speaker"][i] + " - " + dataset["text"][i])
            label_idx = dataset["label"][i]
            full_tgt_lst.append(self.idx2label_word[label_idx])
        for i, (src, tgt, tag) in enumerate(zip(full_src_lst, full_tgt_lst, dataset["label"])):
            example = InputExample(guid=str(i), text_a=src, tgt_text=tgt, label=tag)
            examples.append(example)
        return examples

In [4]:
# Preparing the data
orig_data = load_dataset("DFKI/radr_intents")
train_task_dataset = orig_data["train"] # train_task_dataset = Dataset.from_csv("radr_intents/train.csv")
dev_task_dataset = orig_data["validation"] 
test_task_dataset = orig_data["test"] 

# Mapping between the labels and their "verbalization" (e.g., for label 1 "Einsatzbefehl" means "order")
label_word2idx = {"Absage":0, "Einsatzbefehl":1, "Informieren":2, "Nachfragen":3, "Anruf":4, "Antwort":5, "Sonstiges":6, "Zusage":7}
dp = CustomPromptProcessor(label_word2idx)

dataset = dict()
dataset["train"] = dp.get_examples(train_task_dataset)
dataset["validation"] = dp.get_examples(dev_task_dataset)
dataset["test"] = dp.get_examples(test_task_dataset)
print(dataset["train"][24])

{
  "guid": "24",
  "label": 3,
  "meta": {},
  "text_a": "UGV 1 - Frage: der Einsatzbefehl gilt auch f\u00fcr UGV 1?",
  "text_b": "",
  "tgt_text": "Nachfragen"
}



### ⚙️ Model Preparation

In [5]:
from transformers import MT5ForConditionalGeneration, MT5Tokenizer, MT5Config
from openprompt import plms
from openprompt.plms import * 

In [6]:
# Setting up the hyperparameters
num_epochs = 5
pmodel_name = "tradrz-"+str(num_epochs)+"-epochs"
template_filename = "tradrz_soft_template_simple.txt"
init_from_vocab = True
num_soft_tokens = 10
batch_size = 8
model = "t5" # "mt5" or "t5"
model_name_or_path = "t5-base" # "google/mt5-base" or "t5-base"
max_seq_len = 32

if model == "mt5":
    plms._MODEL_CLASSES["mt5"]= ModelClass(**{"config": MT5Config, "tokenizer": MT5Tokenizer, \
                                              "model": MT5ForConditionalGeneration, "wrapper": T5TokenizerWrapper})

store_dir = "saved_models_openprompt/"
if not os.path.exists(store_dir):
    os.makedirs(store_dir)

plm, tokenizer, model_config, WrapperClass = load_plm(model, model_name_or_path)

# We need to add special tokens for mT5 since it does not have them by default
if model == "mt5":
    tokenizer.add_tokens(["<extra_id_0>"])
    plm.resize_token_embeddings(len(tokenizer))
    tokenizer.additional_special_tokens = ["<extra_id_0>"]
    tokenizer.additional_special_token_ids = [len(tokenizer)]
  
# Preparing the template and the verbalizer
# mytemplate = SoftTemplate(model=plm, tokenizer=tokenizer, num_tokens=num_soft_tokens, \
#                          initialize_from_vocab=init_from_vocab).from_file(template_filename, choice=0)
mytemplate = SoftTemplate(model=plm, tokenizer=tokenizer, num_tokens=num_soft_tokens, \
                          initialize_from_vocab=init_from_vocab, text="{'placeholder':'text_a'} {'mask'}")
myverbalizer = ManualVerbalizer(tokenizer, num_classes=8, \
                                label_words=[["Absage"], ["Einsatzbefehl"], ["Informieren"], ["Nachfragen"], \
                                             ["Anruf"], ["Antwort"], ["Sonstiges"], ["Zusage"]])
# Preparing the training data
train_dataloader = PromptDataLoader(dataset=dataset["train"], template=mytemplate, tokenizer=tokenizer, \
                                    tokenizer_wrapper_class=WrapperClass, max_seq_length=max_seq_len, \
                                    decoder_max_length=5, batch_size=batch_size, shuffle=True, \
                                    teacher_forcing=False, predict_eos_token=False, truncate_method="head")

# Initializing the prompt model
prompt_model = PromptForClassification(plm=plm, template=mytemplate, verbalizer=myverbalizer, freeze_plm=False)
prompt_model = prompt_model.to(device)

# Setting up the loss function and the optimizer
loss_func = torch.nn.CrossEntropyLoss()
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in prompt_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in prompt_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=5e-5)

# Preparing the vaildation data
validation_dataloader = PromptDataLoader(dataset=dataset["validation"], template=mytemplate, tokenizer=tokenizer, \
                                         tokenizer_wrapper_class=WrapperClass, max_seq_length=max_seq_len, decoder_max_length=5, \
                                         batch_size=batch_size,shuffle=False, teacher_forcing=False, predict_eos_token=False, \
                                         truncate_method="head")


For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
tokenizing: 2610i

### 🚀 Training

In [7]:
# Training
dev_loss_min = None
for epoch in range(num_epochs):
    total_loss = 0
    for step, inputs in enumerate(train_dataloader):
        inputs = inputs.to(device)
        logits = prompt_model(inputs)
        labels = inputs['label']
        loss = loss_func(logits, labels)
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()
    print("Epoch {}, average training loss: {}".format(epoch, total_loss/(step+1)), flush=True)
    
    # Evaluation on the validation set
    with torch.no_grad():
        cur_loss = 0
        for step, inputs in enumerate(validation_dataloader):
            inputs = inputs.to(device)
            logits = prompt_model(inputs)
            labels = inputs['label']
            loss = loss_func(logits, labels)
            cur_loss += loss.item()
        cur_loss = cur_loss/(step+1)
        if dev_loss_min is None or cur_loss<dev_loss_min:
            dev_loss_min = cur_loss
            torch.save(prompt_model.state_dict(), store_dir+pmodel_name+".pt")
    print("Epoch {}, average validation loss: {}".format(epoch, total_loss/(step+1)), flush=True)

Epoch 0, average training loss: 0.8241562300592388
Epoch 0, average validation loss: 6.9102330058813095
Epoch 1, average training loss: 0.353786771567339
Epoch 1, average validation loss: 2.9663660077569194
Epoch 2, average training loss: 0.19130322488810902
Epoch 2, average validation loss: 1.6040039625233755
Epoch 3, average training loss: 0.1229876136365959
Epoch 3, average validation loss: 1.031203837414535
Epoch 4, average training loss: 0.09304335780957264
Epoch 4, average validation loss: 0.7801327693264167


### ✅ Evaluation

In [9]:
# Evaluation on the test set
test_dataloader = PromptDataLoader(dataset=dataset["test"], template=mytemplate, tokenizer=tokenizer, \
                                   tokenizer_wrapper_class=WrapperClass, max_seq_length=max_seq_len, \
                                   decoder_max_length=3, batch_size=batch_size,shuffle=False, \
                                   teacher_forcing=False, predict_eos_token=False, truncate_method="head")

prompt_model.load_state_dict(torch.load(store_dir+pmodel_name+".pt"))
torch.save(prompt_model.template.soft_embeds.data, 'soft_tensors.pt')

print("Evaluating model:", pmodel_name)

alltexts = []
alltexts = [instance.text_a for instance in dataset["test"]]
allpreds = []
alllabels = []
for step, inputs in enumerate(test_dataloader):
    inputs = inputs.to(device)
    logits = prompt_model(inputs)
    labels = inputs['label']
    alllabels.extend(labels.cpu().tolist())
    allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())
acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)
print("Accuracy:", acc)

all_labels = ["Absage", "Einsatzbefehl", "Information_geben", "Information_nachfragen", "Kontakt_Anfrage", "Kontakt_Bestaetigung", "Sonstiges", "Zusage"]
id2label = dict()
for i, label in enumerate(all_labels):
    id2label[i] = label

scores = dict()
for label in all_labels:
    scores[label] = {"tp":0, "fp":0, "fn":0}

match = 0
for i in range(len(allpreds)):
    predicted_label = id2label[allpreds[i]]
    gold_label = id2label[alllabels[i]]
    if predicted_label==gold_label:
        match+=1
        scores[predicted_label]["tp"]+=1
    else:
        print("Predicted:", predicted_label)
        print("Gold:", gold_label)
        print("Text:", alltexts[i], "\n")
        scores[predicted_label]["fp"]+=1
        scores[gold_label]["fn"]+=1

print("Accuracy:", round(match/len(allpreds),3), "matched:", match, "total:", len(allpreds))
print("F1 scores:")

micro_prec = 0
micro_rec = 0
micro_f1 = 0

f1scores = 0
all_tp = 0
all_fp = 0
all_fn = 0

# Computing F1 scores (per label)
for label in all_labels:
    tp = scores[label]["tp"]
    fp = scores[label]["fp"]
    fn = scores[label]["fn"]
    all_tp+=tp
    all_fp+=fp
    all_fn+=fn
    if (tp+fp)>0:
        prec = tp/(tp+fp)
    else:
        prec = 0
    if (tp+fn)>0:
        rec = tp/(tp+fn)
    else:
        rec = 0
    if (prec+rec)>0:
        f1score = 2*prec*rec/(prec+rec)
    else:
        f1score = 0
    f1scores+=f1score
    print(label, "F1:", round(f1score,3))
if (all_tp+all_fp)>0:
    micro_prec = all_tp/(all_tp+all_fp)
if (all_tp+all_fn)>0:
    micro_rec = all_tp/(all_tp+all_fn)
if (micro_prec+micro_rec)>0:
    micro_f1 = 2*micro_prec*micro_rec/(micro_prec+micro_rec)

# Computing macro F1 scores
print("Macro F1:", round(f1scores/len(all_labels),3))
print("Micro F1:", round(micro_f1,3))

tokenizing: 605it [00:00, 2536.46it/s]


Evaluating model: tradrz-5-epochs
Accuracy: 0.715702479338843
Predicted: Information_geben
Gold: Absage
Text: UGV 1 - Keine Thermik. 

Predicted: Information_geben
Gold: Kontakt_Anfrage
Text: UAV - Markus für Dirk. 

Predicted: Information_geben
Gold: Kontakt_Bestaetigung
Text: TL - Dirk, kommen. 

Predicted: Information_geben
Gold: Zusage
Text: TL - Das ist gut. 

Predicted: Information_geben
Gold: Kontakt_Anfrage
Text: UAV - Markus für Dirk. 

Predicted: Information_geben
Gold: Kontakt_Bestaetigung
Text: TL - Dirk, kommen. 

Predicted: Zusage
Gold: Information_geben
Text: UGV 1 - Ja, verstanden, zwei bis drei Vermisste. 

Predicted: Information_geben
Gold: Absage
Text: UGV 1 - Nein. 

Predicted: Zusage
Gold: Information_geben
Text: TL - Ja. 

Predicted: Information_geben
Gold: Zusage
Text: UGV 1 - Das ist richtig, ich habe eine Person gefunden. 

Predicted: Zusage
Gold: Information_geben
Text: TL - Das ist verstanden. 

Predicted: Information_geben
Gold: Kontakt_Anfrage
Text: UAV - M

T5 Results:
```
Accuracy: 0.716 matched: 433 total: 605
F1 scores:
Absage F1: 0.545
Einsatzbefehl F1: 0.612
Information_geben F1: 0.721
Information_nachfragen F1: 0.901
Kontakt_Anfrage F1: 0.824
Kontakt_Bestaetigung F1: 0.827
Sonstiges F1: 0.267
Zusage F1: 0.319
Macro F1: 0.627
Micro F1: 0.716
```

mT5 Results:
```
Accuracy: 0.615 matched: 372 total: 605
F1 scores:
Absage F1: 0.4
Einsatzbefehl F1: 0.296
Information_geben F1: 0.705
Information_nachfragen F1: 0.633
Kontakt_Anfrage F1: 0.717
Kontakt_Bestaetigung F1: 0.805
Sonstiges F1: 0.174
Zusage F1: 0.188
Macro F1: 0.49
Micro F1: 0.615
```