In [1]:
from IPython.display import clear_output

In [2]:
!pip install openprompt
!pip install torch
!pip install jsonlines
clear_output()

In [3]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [4]:
import jsonlines

with jsonlines.open("/content/train.jsonl", "r") as fin:
    train_data = list(fin)
with jsonlines.open("/content/val.jsonl", "r") as fin:
    dev_data = list(fin)
with jsonlines.open("/content/test.jsonl", "r") as fin:
    test_data = list(fin)

In [7]:
# creating a special dataset
from openprompt.data_utils import InputExample

def create_dataset_for_p_tuning(train_data, val_data, test_data):
    dataset = {key: [] for key in ['train', 'validation', 'test']}
    for data_example in train_data:
        input_example = InputExample(text_a = data_example['passage'], text_b = data_example['question'], label=int(data_example['label']), guid=data_example['idx'])
        dataset['train'].append(input_example)
    for data_example in val_data:
        input_example = InputExample(text_a = data_example['passage'], text_b = data_example['question'], label=int(data_example['label']), guid=data_example['idx'])
        dataset['validation'].append(input_example)
    for data_example in test_data:
        input_example = InputExample(text_a = data_example['passage'], text_b = data_example['question'], guid=data_example['idx'])
        dataset['test'].append(input_example)

    return dataset

dataset = create_dataset_for_p_tuning(train_data, dev_data, test_data)

In [8]:
# loading pretraied language model
from openprompt.plms import load_plm

plm, tokenizer, model_config, WrapperClass = load_plm("bert", "DeepPavlov/rubert-base-cased")

Downloading (…)lve/main/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at DeepPavlov/rubert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

In [9]:
# defining p-tuning template
from openprompt.prompts import PtuningTemplate

p_tuning_template = PtuningTemplate(model=plm, tokenizer=tokenizer, text='{"placeholder": "text_a"} Вопрос: {"placeholder": "text_b"} {"soft"} {"soft"} {"soft"} Ответ: {"mask"} .')

In [10]:
# labels verbalizer – enables us to match 0 class to "no" answer and 1 class to "yes" answer
from openprompt.prompts import ManualVerbalizer

BinaryQAVerbalizer = ManualVerbalizer(tokenizer, num_classes=2, classes=[0, 1], label_words=[['нет'], ['да']])

In [12]:
# train and val dataloaders 
from openprompt import PromptDataLoader

train_dataloader = PromptDataLoader(dataset=dataset["train"], template=p_tuning_template, tokenizer=tokenizer,
                                    tokenizer_wrapper_class=WrapperClass, max_seq_length=512, batch_size=8, shuffle=True, 
                                    teacher_forcing=False, predict_eos_token=False, truncate_method="tail")
validation_dataloader = PromptDataLoader(dataset=dataset["validation"], template=p_tuning_template, tokenizer=tokenizer,
                                         tokenizer_wrapper_class=WrapperClass, max_seq_length=512, batch_size=8, shuffle=False, 
                                         teacher_forcing=False, predict_eos_token=False, truncate_method="tail")
test_dataloader = PromptDataLoader(dataset=dataset["test"], template=p_tuning_template, tokenizer=tokenizer,
                                   tokenizer_wrapper_class=WrapperClass, max_seq_length=512, batch_size=8, shuffle=False, 
                                   teacher_forcing=False, predict_eos_token=False, truncate_method="tail")

tokenizing: 1749it [00:07, 229.98it/s]
tokenizing: 821it [00:02, 321.16it/s]
tokenizing: 805it [00:02, 331.21it/s]


In [13]:
import torch
from openprompt import PromptForClassification
from transformers import  AdamW, get_linear_schedule_with_warmup

use_cuda = True
prompt_model = PromptForClassification(plm=plm, template=p_tuning_template, verbalizer=BinaryQAVerbalizer, freeze_plm=False)
if use_cuda:
    prompt_model = prompt_model.cuda()

loss_func = torch.nn.CrossEntropyLoss()
no_decay = ['bias', 'LayerNorm.weight']

# it's always good practice to set no decay to biase and LayerNorm parameters
optimizer_grouped_parameters = [
    {'params': [p for n, p in prompt_model.plm.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in prompt_model.plm.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=1e-5)



In [14]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

cuda = True
def fit(model, train_dataloader, val_dataloader, loss_func, optimizer, PATH):
    best_score = 0.0
    best_score_metrics = {elem:list() for elem in ["Precision", "Recall", "F1"]}
    for epoch in range(5):
        train_loss = train_epoch(model, train_dataloader, loss_func, optimizer)
        acc_score, metrics = evaluate(model, val_dataloader)
        # metrics for each class
        precision, recall, f1 = [round(100*elem, 2) for elem in metrics[0]], [round(100*elem, 2) for elem in metrics[1]], [round(100*elem, 2) for elem in metrics[2]]
        if acc_score > best_score:
            best_score = acc_score
            metrics_dict = {"Precision":precision, "Recall":recall, "F1":f1}
            best_score_metrics.update(metrics_dict)
            # also saving best model and its parameters 
            torch.save(model, f'{PATH}/best-model_{epoch}.pt')
            torch.save(model.state_dict(), f'{PATH}/best-model-parameters_{epoch}.pt')
        print(f"Epoch {epoch+1}: Train loss={train_loss}, Accuracy: {round(100 * acc_score, 2)}")
        for key, value in zip(["Precision", "Recall", "F1"], metrics):
            print("{}: Positive {:.2f}, Negative {:.2f}".format(key, *(list(100 * value))))
        print('\n')
    return best_score, best_score_metrics


def train_epoch(model, train_dataloader, loss_func, optimizer):
    model.train()
    loss_all = []
    for step, inputs in enumerate(train_dataloader):
        if cuda:
            inputs = inputs.cuda()
        logits = model(inputs)
        #_, pred = torch.max(logits, 1)
        labels = inputs['label']
        loss = loss_func(logits, labels)
        loss.backward()
        loss_all.append(loss.item())
        optimizer.step()
        optimizer.zero_grad()
    return np.mean(loss_all)

def evaluate(model, val_dataloader):
    model.eval()
    corr_labels = []
    pred_labels = []
    with torch.no_grad():
        for step, inputs in enumerate(val_dataloader):
            if cuda:
                inputs = inputs.cuda()
            logits = model(inputs)
            labels = inputs['label']
            corr_labels.extend(labels.cpu().tolist())
            pred_labels.extend(torch.argmax(logits, dim=-1).cpu().tolist())
    accuracy = accuracy_score(corr_labels, pred_labels)
    metrics = precision_recall_fscore_support(y_true=corr_labels, y_pred=pred_labels, labels=[1, 0])[:3]
    return accuracy, metrics


In [None]:
if cuda:
    prompt_model = prompt_model.cuda()
save_path = "/content/drive/MyDrive/Diploma 2.0/rubert_deeppavl_p_tuning_full_p_tuning_upd"
score, metrics = fit(prompt_model, train_dataloader, validation_dataloader, loss_func, optimizer, save_path)

Epoch 1: Train loss=0.5679489897177631, Accuracy: 73.69
Precision: Positive 69.07, Negative 81.43
Recall: Positive 86.17, Negative 61.12
F1: Positive 76.67, Negative 69.83


Epoch 2: Train loss=0.33740226114668076, Accuracy: 73.08
Precision: Positive 67.65, Negative 83.57
Recall: Positive 88.83, Negative 57.21
F1: Positive 76.81, Negative 67.92


Epoch 3: Train loss=0.1968079284251405, Accuracy: 73.33
Precision: Positive 68.74, Negative 81.05
Recall: Positive 85.92, Negative 60.64
F1: Positive 76.38, Negative 69.37


Epoch 4: Train loss=0.10254142563764133, Accuracy: 71.86
Precision: Positive 66.31, Negative 83.46
Recall: Positive 89.32, Negative 54.28
F1: Positive 76.11, Negative 65.78


Epoch 5: Train loss=0.0630887678036111, Accuracy: 70.65
Precision: Positive 65.35, Negative 81.82
Recall: Positive 88.35, Negative 52.81
F1: Positive 75.13, Negative 64.19




In [None]:
print(f"Best model accuracy score: {round(100*score, 2)}")
print(f"Precision: Positive {metrics['Precision'][0]} Negative {metrics['Precision'][1]}")
print(f"Recall: Positive {metrics['Recall'][0]} Negative {metrics['Recall'][1]}")
print(f"F1: Positive {metrics['F1'][0]} Negative {metrics['F1'][1]}")

Best model accuracy score: 73.69
Precision: Positive 69.07 Negative 81.43
Recall: Positive 86.17 Negative 61.12
F1: Positive 76.67 Negative 69.83


In [None]:
!pip install torch_model

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch_model
  Downloading torch_model-0.0.1-py3-none-any.whl (2.9 kB)
Installing collected packages: torch_model
Successfully installed torch_model-0.0.1


In [16]:
# obtaining predictions for test set 
prompt_model.load_state_dict(torch.load("/content/drive/MyDrive/Diploma 2.0/best-model-parameters_0.pt"))

<All keys matched successfully>

In [17]:
cuda = True

In [18]:
def get_test_predictions(model, dataloader):
    model.eval()
    pred_labels = []
    with torch.no_grad():
        for step, inputs in enumerate(dataloader):
            if cuda:
                inputs = inputs.cuda()
            logits = model(inputs)
            pred_labels.extend(torch.argmax(logits, dim=-1).cpu().tolist())
    return pred_labels


predicted_test_labels = get_test_predictions(prompt_model, test_dataloader)

In [19]:
test_result = []

for i, predicted_label in enumerate(predicted_test_labels):
    result = {}
    if predicted_label == 1:
        result['idx'] = i
        result['label'] = "true"
    else: 
        result['idx'] = i
        result['label'] = "false"
    test_result.append(result)

In [20]:
!pip install jsonlines

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [21]:
import jsonlines

with jsonlines.open('DaNetQA.jsonl', mode='w') as writer:
    writer.write_all(test_result)

In [22]:
with jsonlines.open('/content/drive/MyDrive/Diploma 2.0/test_DaNetQA_p_tune_add_full_p_tuned_final_vers.jsonl', mode='w') as writer:
    writer.write_all(test_result)

In [23]:
from zipfile import ZipFile
 
with ZipFile("superglue_binaryqa_p_tuned.zip", "w") as myzip:
    myzip.write('DaNetQA.jsonl')