# Open Prompt Classifier
Based on this tutorial: https://github.com/thunlp/OpenPrompt/blob/main/tutorial/0_basic.py and tutorial from Natalia Skachkova and Tatiana Anikina

In [1]:
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


In [2]:
cd /content/drive/MyDrive/thesis/prompting/open_prompt_classifier

/content/drive/MyDrive/thesis/prompting/open_prompt_classifier


In [3]:
!pip install openprompt
!pip install torch

Collecting openprompt
  Downloading openprompt-1.0.1-py3-none-any.whl (146 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.4/146.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers>=4.10.0 (from openprompt)
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m60.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece==0.1.96 (from openprompt)
  Downloading sentencepiece-0.1.96-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m70.8 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboardX (from openprompt)
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
Collecting yacs (from openprompt)
  Downloading yacs-0.1.8

In [4]:
!pip install import-ipynb
import import_ipynb

Collecting import-ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Collecting jedi>=0.16 (from IPython->import-ipynb)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, import-ipynb
Successfully installed import-ipynb-0.1.4 jedi-0.19.1


In [5]:
from openprompt import PromptForClassification
from openprompt.plms import load_plm

from openprompt.prompts import ManualTemplate, ManualVerbalizer, SoftTemplate
from openprompt import PromptDataLoader

from claqua_prompt_dataset import CLAQUAPromptProcessor

import torch

import re
from datetime import datetime



importing Jupyter notebook from claqua_prompt_dataset.ipynb


In [6]:
use_cuda = True
device = "cuda"

In [7]:
!cp /content/drive/MyDrive/thesis/prompting/open_prompt_classifier/read_claqua_corpus.py /content
!cp /content/drive/MyDrive/thesis/prompting/open_prompt_classifier/preprocess_claqua_single_turn.py /content
!cp /content/drive/MyDrive/thesis/prompting/open_prompt_classifier/preprocess_claqua_multi_turn.py /content

In [8]:
from read_claqua_corpus import read_train_dev_test

In [49]:
# Setting up the hyperparameters
num_epochs = 3
template_filename = "claqua_soft_template_5.txt"
corpus_split = "multi-turn"
init_from_vocab = True
freeze_lm = False
num_soft_tokens = 50 #[10, 50, 100]
batch_size = 8 #[8, 16]
learning_rate = 5e-5
model = "bert"
model_name_or_path = "bert-base-cased"
store_dir = "saved_models/"
do_train = True

plm, tokenizer, model_config, WrapperClass = load_plm(model, model_name_or_path)
# Preparing the template and the verbalizer
with open(template_filename, 'r') as f:
  for line in f:
    print(line)
mytemplate = SoftTemplate(model=plm, tokenizer=tokenizer, num_tokens=num_soft_tokens, initialize_from_vocab=init_from_vocab).from_file(template_filename, choice=0)#
myverbalizer = ManualVerbalizer(tokenizer, num_classes=2, label_words=[["no"], ["yes"]])
pmodel_name = "claqua_"+corpus_split+"_"+model+"_"+"softtokens"+str(num_soft_tokens)+"_lr"+str(learning_rate)+"_batch"+str(batch_size)+"_epochs"+str(num_epochs)+"_freeze"+str(freeze_lm)+"__"+template_filename

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{"placeholder":"text_a"} Does this context question to a knowledge-based question answering system need a clarification request? {"mask"}


In [44]:
pmodel_name

'claqua_multi-turn_bert_softtokens50_lr5e-05_batch8_epochs3_freezeFalse__claqua_soft_template_6.txt'

## Reload dataset

In [36]:
# Load train, dev and test corpus
data_path = "claqua_data"
num_max_toks = 300
TURN_SEP_TOK = ' [TURN_SEP] '
CONTEXT_SEP_TOK = ' [CONTEXT_SEP] '
ENTITY_SEP_TOK = ' [ENTITY_SEP] '
train_corpus, dev_corpus, test_corpus = read_train_dev_test(data_path=data_path, corpus_split=corpus_split, tokenizer=tokenizer, num_max_toks=num_max_toks, turn_sep_tok=TURN_SEP_TOK, context_sep_tok=CONTEXT_SEP_TOK, entity_sep_tok=ENTITY_SEP_TOK)


Token indices sequence length is longer than the specified maximum sequence length for this model (1829 > 512). Running this sequence through the model will result in indexing errors


In [37]:
dataset = {}
dataset['train'] = CLAQUAPromptProcessor(train_corpus.df['label']).get_train_examples(train_corpus.df['full_text_for_encoding'])
dataset['validation'] = CLAQUAPromptProcessor(dev_corpus.df['label']).get_dev_examples(dev_corpus.df['full_text_for_encoding'])
dataset['test'] = CLAQUAPromptProcessor(test_corpus.df['label']).get_test_examples(test_corpus.df['full_text_for_encoding'])

20462
973
828


In [38]:
dataset['train'][1]

{
  "guid": "1",
  "label": 1,
  "meta": {},
  "text_a": "CONTEXT: Is there a sequel to bazil broketail  [TURN_SEP]  A Sword for a Dragon  [TURN_SEP]  How about the style of this creative work? ENTITY 1: Bazil Broketail Bazil Broketail ( 1992 ) is a fantasy novel written by Christopher Rowley. The book is the first in the Dragons of the Argonath series that follows the adventures of a human boy, Relkin, and his dragon, Bazil Broketail as they fight in the Argonath Legion \u2019 s 109th Mar ENTITY 2: A Sword for a Dragon A Sword for a Dragon is a fantasy novel written by Christopher Rowley. The book is the second in the Dragons of the Argonath series that follows the adventures of a human boy, Relkin, and his dragon, Bazil Broketail as they fight in the Argonath Legion \u2019 s 109th Marneri Dragons. As they continue their service to the Legion, Bazil acquires a new sword that is infused with the spirit of Ecator, an ancient black cat from the city of Tummuz Orgmeen. The 109th Marneri D

## Train

In [50]:
# Preparing the training data
#train_dataloader = PromptDataLoader(dataset=dataset["train"][:64], template=mytemplate, tokenizer=tokenizer,
train_dataloader = PromptDataLoader(dataset=dataset["train"], template=mytemplate, tokenizer=tokenizer,
    tokenizer_wrapper_class=WrapperClass, max_seq_length=num_max_toks, decoder_max_length=5,
    batch_size=batch_size,shuffle=True, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")
# Initializing the prompt model
prompt_model = PromptForClassification(plm=plm, template=mytemplate, verbalizer=myverbalizer, freeze_plm=freeze_lm)
if use_cuda:
    prompt_model = prompt_model.cuda()
# Setting up the loss function and the optimizer
loss_func = torch.nn.CrossEntropyLoss()
no_decay = ['bias', 'LayerNorm.weight']
# A good practice is to set no decay to biase and LayerNorm parameters
optimizer_grouped_parameters = [
    {'params': [p for n, p in prompt_model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in prompt_model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=learning_rate)
# Preparing the validation data
#validation_dataloader = PromptDataLoader(dataset=dataset["validation"][:16], template=mytemplate, tokenizer=tokenizer,
validation_dataloader = PromptDataLoader(dataset=dataset["validation"], template=mytemplate, tokenizer=tokenizer,
    tokenizer_wrapper_class=WrapperClass, max_seq_length=num_max_toks, decoder_max_length=5,
    batch_size=batch_size,shuffle=False, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")

tokenizing: 20462it [01:15, 269.93it/s]
tokenizing: 973it [00:03, 253.00it/s]


In [51]:
# Training
now = datetime.now()
print(now)
dev_loss_min = None
if do_train:
    for epoch in range(num_epochs):
        total_loss = 0
        for step, inputs in enumerate(train_dataloader):
            if use_cuda:
                inputs = inputs.cuda()
            logits = prompt_model(inputs)
            labels = inputs['label']
            loss = loss_func(logits, labels)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            optimizer.zero_grad()
        # datetime object containing current date and time
        now = datetime.now()
        print("Time {}, Epoch {}, average loss: {}".format(now, epoch, total_loss/(step+1)), flush=True)
        # Evaluation on the development set
        with torch.no_grad():
            cur_loss = 0
            for step, inputs in enumerate(validation_dataloader):
                if use_cuda:
                    inputs = inputs.cuda()
                logits = prompt_model(inputs)
                labels = inputs['label']
                loss = loss_func(logits, labels)
                cur_loss += loss.item()
            cur_loss = cur_loss/(step+1)
            if dev_loss_min is None or cur_loss<dev_loss_min:
                dev_loss_min = cur_loss
                torch.save(prompt_model.state_dict(), store_dir+pmodel_name+".pt")

2023-10-06 09:19:32.169923
Time 2023-10-06 09:27:14.748851, Epoch 0, average loss: 0.1012928966973411
Time 2023-10-06 09:35:05.332903, Epoch 1, average loss: 0.031634247311427444
Time 2023-10-06 09:42:54.901493, Epoch 2, average loss: 0.020425708075144784


In [52]:
# Evaluation on the test set
test_dataloader = PromptDataLoader(dataset=dataset["test"], template=mytemplate, tokenizer=tokenizer,
    tokenizer_wrapper_class=WrapperClass, max_seq_length=num_max_toks, decoder_max_length=3,
    batch_size=batch_size,shuffle=False, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")

prompt_model.load_state_dict(torch.load(store_dir+pmodel_name+".pt"))
#torch.save(prompt_model.template.soft_embeds.data, 'soft_tensors.pt')
print("Evaluating model:", pmodel_name)

alltexts = []
alltexts = [instance.text_a for instance in dataset["test"]]
allpreds = []
alllabels = []
for step, inputs in enumerate(test_dataloader):
    if use_cuda:
        inputs = inputs.cuda()
    logits = prompt_model(inputs)
    labels = inputs['label']
    alllabels.extend(labels.cpu().tolist())
    allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())
acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)
print("Accuracy:", acc)

tokenizing: 828it [00:03, 272.11it/s]


Evaluating model: claqua_multi-turn_bert_softtokens50_lr5e-05_batch8_epochs3_freezeFalse__claqua_soft_template_5.txt
Accuracy: 0.9347826086956522


In [53]:
from sklearn.metrics import classification_report
print(classification_report(y_true=alllabels, y_pred=allpreds,digits=4))

              precision    recall  f1-score   support

           0     0.9803    0.8964    0.9365       444
           1     0.8910    0.9792    0.9330       384

    accuracy                         0.9348       828
   macro avg     0.9356    0.9378    0.9347       828
weighted avg     0.9389    0.9348    0.9349       828



### Print incorrectly classified corpus items

In [54]:
all_labels = ["no", "yes"]
id2label = dict()
for i, label in enumerate(all_labels):
    id2label[i] = label

scores = dict()
for label in all_labels:
    scores[label] = {"tp":0, "fp":0, "fn":0}
match = 0
for i in range(len(allpreds)):
    predicted_label = id2label[allpreds[i]]
    gold_label = id2label[alllabels[i]]
    if predicted_label==gold_label:
        match+=1
        scores[predicted_label]["tp"]+=1
    else:
        print("Predicted:", predicted_label)
        print("Gold:", gold_label)
        print("Text:", alltexts[i], "\n")
        scores[predicted_label]["fp"]+=1
        scores[gold_label]["fn"]+=1

Predicted: no
Gold: yes
Text: CONTEXT: Where is administrative capital for ferreñafe  [TURN_SEP]  Ferreñafe  [TURN_SEP]  In which time zone? ENTITY 1: Ferreñafe  Ferreñafe District is one of six districts of the province Ferreñafe in Peru. ENTITY 2: Ferreñafe  Ferreñafe is a town in Northern Peru, capital of the province Ferreñafe in the region Lambayeque. 

Predicted: yes
Gold: no
Text: CONTEXT: To which automative class does kia optima 2014 correspond?  [TURN_SEP]  2018 hyundai sonata hybrid  [TURN_SEP]  What was the engine used? ENTITY 1: Kia optima 2014  The 2014 Kia Optima Hybrid adds aerodynamic improvements including restyled front and rear fascias. Additional vents are added to the Optima Hybrid’s front bumper to increase airflow and reduce drag. New 16- and 17-inch wheel designs reduce weight and drag while enhancing the design. The Optima Hybrid features a unique grille design, front LED lighting exclusive to the hybrid model, new Hybrid fender badges and an optional white le