In [90]:
from datasets import load_dataset
import loader
device = 'cuda:1'
data_root_dir = 'dataset/FSS/FewShotSST/'
split = {'train': 'train_128.tsv', 'validation': 'dev.tsv', 'test': 'test.tsv'}
raw_dataset = loader.generate_dialogue(data_root_dir, split)
raw_dataset['train'][0]

{'premise': 'A seriocomic debut of extravagant promise by Georgian-Israeli director Dover Kosashvili .',
 'hypothesis': 'it was positive',
 'idx': 0,
 'label': 1}

In [91]:
from openprompt.data_utils import InputExample

In [92]:
dataset = {}
for split in ['train', 'validation', 'test']:
    dataset[split] = []
    for data in raw_dataset[split]:
        input_example = InputExample(text_a = data['premise'], text_b = data['hypothesis'], label=int(data['label']), guid=data['idx'])
        dataset[split].append(input_example)
print(dataset['train'][0])

{
  "guid": 0,
  "label": 1,
  "meta": {},
  "text_a": "A seriocomic debut of extravagant promise by Georgian-Israeli director Dover Kosashvili .",
  "text_b": "it was positive",
  "tgt_text": null
}



In [93]:
from openprompt.plms import load_plm

In [116]:
plm, tokenizer, model_config, WrapperClass = load_plm("bert", "bert-base-cased")

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [117]:
from openprompt.prompts import MixedTemplate

In [118]:
mytemplate1 = MixedTemplate(model=plm, tokenizer=tokenizer, text='{"placeholder":"text_a"} {"soft": "Question:"} {"placeholder":"text_b"}? Is it correct? {"mask"}.')
mytemplate = MixedTemplate(model=plm, tokenizer=tokenizer, text='{"placeholder":"text_a"} {"soft"} {"soft"} {"soft"} {"placeholder":"text_b"} {"soft"} {"mask"}.')
choose_template = mytemplate

In [119]:
wrapped_example = mytemplate.wrap_one_example(dataset['train'][0]) 
print(wrapped_example)

[[{'text': 'A seriocomic debut of extravagant promise by Georgian-Israeli director Dover Kosashvili .', 'soft_token_ids': 0, 'loss_ids': 0, 'shortenable_ids': 1}, {'text': '', 'soft_token_ids': 1, 'loss_ids': 0, 'shortenable_ids': 0}, {'text': '', 'soft_token_ids': 2, 'loss_ids': 0, 'shortenable_ids': 0}, {'text': '', 'soft_token_ids': 3, 'loss_ids': 0, 'shortenable_ids': 0}, {'text': ' it was positive', 'soft_token_ids': 0, 'loss_ids': 0, 'shortenable_ids': 1}, {'text': '', 'soft_token_ids': 4, 'loss_ids': 0, 'shortenable_ids': 0}, {'text': '<mask>', 'soft_token_ids': 0, 'loss_ids': 1, 'shortenable_ids': 0}, {'text': '.', 'soft_token_ids': 0, 'loss_ids': 0, 'shortenable_ids': 0}], {'guid': 0, 'label': 1}]


In [120]:
wrapped_tokenizer = WrapperClass(max_seq_length=128, decoder_max_length=3, tokenizer=tokenizer,truncate_method="head")

In [121]:
from openprompt import PromptDataLoader

train_dataloader = PromptDataLoader(dataset=dataset["train"], template=choose_template, tokenizer=tokenizer, 
    tokenizer_wrapper_class=WrapperClass, max_seq_length=256, decoder_max_length=3, 
    batch_size=4,shuffle=True, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")

tokenizing: 128it [00:00, 543.20it/s]


In [122]:
from openprompt.prompts import ManualVerbalizer
import torch

In [123]:
from openprompt.prompts import SoftVerbalizer
myverbalizer = ManualVerbalizer(tokenizer, num_classes=2, label_words=[["yes"], ["no"], ["maybe"]])
promptVerbalizer2 = SoftVerbalizer(tokenizer=tokenizer, plm=plm, num_classes=2)
chooseVerbalizer = promptVerbalizer2

In [124]:
print(myverbalizer.label_words_ids)
logits = torch.randn(2,len(tokenizer)) # creating a pseudo output from the plm
myverbalizer.process_logits(logits)

Parameter containing:
tensor([[[4208]],

        [[1185]],

        [[2654]]])


tensor([[-0.9880, -0.8648, -1.5773],
        [-2.0271, -1.0226, -0.6761]])

In [125]:
from openprompt import PromptForClassification

use_cuda = True
prompt_model = PromptForClassification(plm=plm,template=choose_template, verbalizer=myverbalizer, freeze_plm=False)
if use_cuda:
    prompt_model=  prompt_model.to(device)

In [126]:
from transformers import  AdamW, get_linear_schedule_with_warmup
from torch.optim import SGD
loss_func = torch.nn.CrossEntropyLoss()

no_decay = ['bias', 'LayerNorm.weight']

# it's always good practice to set no decay to biase and LayerNorm parameters
optimizer_grouped_parameters1 = [
    # {'params': [p for n, p in prompt_model.plm.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    # {'params': [p for n, p in prompt_model.plm.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    {'params': [p for n, p in prompt_model.plm.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.5},
    {'params': [p for n, p in prompt_model.plm.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.5}
]

# Using different optimizer for prompt parameters and model parameters
optimizer_grouped_parameters2 = [
    {'params': [p for n,p in prompt_model.template.named_parameters() if "raw_embedding" not in n]}
]

# optimizer1 = AdamW(optimizer_grouped_parameters1, lr=1e-4)
# optimizer2 = AdamW(optimizer_grouped_parameters2, lr=1e-3)
# optimizer1 = AdamW(optimizer_grouped_parameters1, lr=2.75e-4)
# optimizer2 = AdamW(optimizer_grouped_parameters2, lr=1e-3)
optimizer1 = SGD(prompt_model.parameters(), lr=8e-6)

epochs = 10
for epoch in range(1, epochs + 1):
    tot_loss = 0 
    batch_cnt = 0
    for step, inputs in enumerate(train_dataloader):
        if use_cuda:
            inputs = inputs.to(device)
        logits = prompt_model(inputs)
        labels = inputs['label']
        loss = loss_func(logits, labels)
        loss.backward()
        tot_loss += loss.item()
        optimizer1.step()
        optimizer1.zero_grad()
        # optimizer2.step()
        # optimizer2.zero_grad()
        batch_cnt += 1
    tot_loss /= batch_cnt
    print('epoch: %d\tTraining Loss: %5lf' % (epoch, tot_loss))

validation_dataloader = PromptDataLoader(dataset=dataset["validation"], template=choose_template, tokenizer=tokenizer, 
    tokenizer_wrapper_class=WrapperClass, max_seq_length=256, decoder_max_length=3, 
    batch_size=4,shuffle=False, teacher_forcing=False, predict_eos_token=False,
    truncate_method="head")


allpreds = []
alllabels = []
for step, inputs in enumerate(validation_dataloader):
    if use_cuda:
        inputs = inputs.to(device)
    logits = prompt_model(inputs)
    labels = inputs['label']
    alllabels.extend(labels.cpu().tolist())
    allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())

acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)
print('Validation Accuracy: %.3lf' % acc)

epoch: 1	Training Loss: 1.472958
epoch: 2	Training Loss: 1.204011
epoch: 3	Training Loss: 1.053555
epoch: 4	Training Loss: 0.942104
epoch: 5	Training Loss: 0.852252
epoch: 6	Training Loss: 0.776923
epoch: 7	Training Loss: 0.705240
epoch: 8	Training Loss: 0.641723
epoch: 9	Training Loss: 0.584747
epoch: 10	Training Loss: 0.536566


tokenizing: 1101it [00:01, 576.48it/s]


Validation Accuracy: 0.773


In [433]:
print(allpreds)

[0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 

In [417]:
testdata_path = 'dataset/FSS/FewShotSST/test.tsv'
test_data = loader.read_unlabel_data(testdata_path)
testset = [
    InputExample(text_a=sentence) for _, sentence in test_data
]
test_dataloader = PromptDataLoader(
    dataset = testset,
    tokenizer = tokenizer,
    template = choose_template,
    tokenizer_wrapper_class=WrapperClass,
)


test_preds = []
for step, inputs in enumerate(test_dataloader):
    if use_cuda:
        inputs = inputs.to(device)
    logits = prompt_model(inputs)
    test_preds.extend(torch.argmax(logits, dim=-1).cpu().tolist())

tokenizing: 2210it [00:02, 778.38it/s] 


In [19]:
print(test_preds)

[1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 

In [57]:
with open('128.tsv', 'w') as f:
    f.write('index\tprediction\n')
    for i in range(len(test_preds)):
        f.write('%d\t%d\n' % (i, test_preds[i]))
    f.close()