# Load data

In [None]:
from openprompt.data_utils import InputExample
from openprompt import PromptForClassification
from openprompt import PromptDataLoader
from openprompt.prompts import SoftTemplate
from openprompt.plms import load_plm
from openprompt.prompts import ManualVerbalizer
import pandas
import warnings
import torch
from transformers import (
    AdamW,
    get_linear_schedule_with_warmup
)

warnings.filterwarnings("ignore")

# Load data
train_tsvreader = pandas.read_csv("train.tsv",sep='\t')
train_data      = []
# [TODO] 
# Load training data
for i in range(len(train_tsvreader)):
    train_data.append(InputExample(guid=i,
                                   text_a=train_tsvreader['sentence'][i],
                                   label=train_tsvreader['label'][i],
                                    ))

valid_tsvreader = pandas.read_csv("test.tsv",sep='\t')
test_data       = []
# [TODO] 
# Load testing data
for i in range(len(valid_tsvreader)):
    test_data.append(InputExample(guid=i,
                                   text_a=valid_tsvreader['sentence'][i],
                                   label=valid_tsvreader['label'][i],
                                   ))

# Define model and template

In [None]:
plm, tokenizer, model_config, WrapperClass = load_plm('bert', 'bert-base-uncased')

template    = '{"placeholder":"text_a"} It was {"mask"}'
soft_tokens = 8
mytemplate  = SoftTemplate(model=plm, text=template, tokenizer=tokenizer,
                    num_tokens=soft_tokens, initialize_from_vocab=True)

classes = [ # There are two classes in Sentiment Analysis, one for negative and one for positive
    "0",
    "1"
]
promptVerbalizer = ManualVerbalizer(
    classes = classes,
    label_words = {
        "0": ["bad","terrible","disgusting","horrible"],
        "1": ["good", "wonderful", "great","excellent"],
    },
    tokenizer = tokenizer,
)

prompt_model = PromptForClassification(
    plm=plm,
    template=mytemplate,
    verbalizer=promptVerbalizer,
    freeze_plm=False
)

# Define DataLoader

In [None]:
train_dataloader = PromptDataLoader(dataset=train_data, template=mytemplate, tokenizer=tokenizer,
    tokenizer_wrapper_class=WrapperClass, max_seq_length=256, decoder_max_length=3,
    batch_size=4,shuffle=True, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")

validation_dataloader = PromptDataLoader(dataset=test_data, template=mytemplate, tokenizer=tokenizer,
    tokenizer_wrapper_class=WrapperClass, max_seq_length=256, decoder_max_length=3,
    batch_size=4,shuffle=False, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")

# Train and inference

In [None]:
optimizer_grouped_parameters = [p for n, p in prompt_model.named_parameters() if p.requires_grad]

# Using different optimizer for prompt parameters and model parameters
optimizer = AdamW(optimizer_grouped_parameters, lr= 5e-5, eps=1e-8)

total_epochs = 5

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=200, num_training_steps=len(train_dataloader)*total_epochs)

loss_func = torch.nn.CrossEntropyLoss()

for epoch in range(total_epochs):
    tot_loss = 0
    prompt_model.train()
    for step, batch in enumerate(train_dataloader):
        logits = prompt_model(batch)
        labels = batch['label']
        loss = loss_func(logits, labels)
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()
        if step %100 ==1:
            print("Epoch {}, average loss: {}".format(epoch, tot_loss/(step+1)), flush=True)
    scheduler.step()

In [None]:
allpreds = []
alllabels = []
for step, inputs in enumerate(validation_dataloader):
    logits = prompt_model(inputs)
    labels = inputs['label']
    alllabels.extend(labels.cpu().tolist())
    allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())

acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)
loss = loss_func(torch.tensor(allpreds), torch.tensor(alllabels))
print("Accuracy: {}, Loss: {}".format(acc, loss))
