### Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [1]:
!pip install transformers
!pip install -q SentencePiece



In [2]:
import csv
import transformers
from   transformers import AutoTokenizer, AutoModelForSequenceClassification, EarlyStoppingCallback, TrainingArguments, Trainer
from   sklearn.metrics import accuracy_score
import torch
import csv
import os
import numpy as np

In [3]:
train, dev, test = [], [], []

In [4]:
with open('./data/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print (len(train))
print (train[:3])

5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', '1'], ['Who eats junk foods.', 'A person typically desire healthy life.', '0'], ['A person is sick.', 'A person typically desire healthy life.', '1']]


In [5]:
with open('./data/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', '1'], ['A person does not care for accuracy.', 'A person typically desires accurate results.', '0'], ['The person double checks their data.', 'A person typically desires accurate results.', '1']]


In [6]:
with open('./data/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [7]:
class Data_Set_Class(torch.utils.data.Dataset):
    def __init__(self, tokenized_data, lbl=None):
        self.lbl            = lbl
        self.tokenized_data = tokenized_data

    def __getitem__(self, index):
        item_dict = dict()
        for k, v in self.tokenized_data.items():
            item_dict[k] = torch.tensor(v[index])
        #item = {key: torch.tensor(val[IndexError]) for key, val in self.encodings.items()}
        if self.lbl:
            item_dict["labels"] = torch.tensor(self.lbl[index])
        return item_dict

    def __len__(self):
        tokenized_input_data = self.tokenized_data["input_ids"]
        tokenized_data_len   = len(tokenized_input_data)
        return tokenized_data_len

In [9]:
pre_trained_transformer = 'cross-encoder/nli-deberta-v3-base'
model                   = AutoModelForSequenceClassification.from_pretrained(pre_trained_transformer)
tokenizer               = AutoTokenizer.from_pretrained(pre_trained_transformer)

Downloading:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/704M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/417 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.35M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/156 [00:00<?, ?B/s]

In [10]:
## dataset in tensor form creation

def generate_data(data):
    data_len = len(data)
    preconditions_list, statements_list, labels_list = list(), list(), list()
    i = 0
    while i < data_len:
      precondition, statement, label = data[i][0], data[i][1], int(data[i][2])
      preconditions_list.append(precondition)
      statements_list.append(statement)
      labels_list.append(label)
      i += 1
    dataset           = tokenizer(preconditions_list, statements_list, return_tensors='pt', padding=True, truncation=True)
    dataset['labels'] = torch.LongTensor([labels_list]).T.reshape(len(preconditions_list))
    return dataset


In [11]:
training_dataset = generate_data(train)
dev_dataset      = generate_data(dev)

final_train_dataset = Data_Set_Class(training_dataset)
final_dev_dataset   = Data_Set_Class(dev_dataset)

In [12]:
# create arguments

output_dir = 'output'
eval_strat = 'steps'
steps      = 500
batch_size = 64
epochs     = 5

def create_arguments():
    return TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy=eval_strat,
    eval_steps=steps,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs, 
    seed=0,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=create_arguments(),
    train_dataset=final_train_dataset,
    eval_dataset=final_dev_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience= 3)],
)

# Train the model!
trainer.train()

***** Running training *****
  Num examples = 5983
  Num Epochs = 5
  Instantaneous batch size per device = 64
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 470
  if __name__ == '__main__':


Step,Training Loss,Validation Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=470, training_loss=0.1443793276523022, metrics={'train_runtime': 266.5785, 'train_samples_per_second': 112.218, 'train_steps_per_second': 1.763, 'total_flos': 538068880235250.0, 'train_loss': 0.1443793276523022, 'epoch': 5.0})

In [13]:
## Predict on dev_data

predictions_dev_data = np.argmax(trainer.predict(final_dev_dataset)[0], axis = 1)

ground_truth_dev_data = [int(dev[i][2]) for i in range(len(dev))]

accuracy_score(predictions_dev_data, ground_truth_dev_data)


***** Running Prediction *****
  Num examples = 1055
  Batch size = 64
  if __name__ == '__main__':


0.9137440758293839

In [14]:
# Eventually, results need to be a list of 2028 0 or 1's

def generate_test_data(data):
    data_len = len(data)
    preconditions_list, statements_list = list(), list()
    i = 0
    while i < data_len:
      precondition, statement= data[i][0], data[i][1]
      preconditions_list.append(precondition)
      statements_list.append(statement)
      i += 1
    dataset           = tokenizer(preconditions_list, statements_list, return_tensors='pt', padding=True, truncation=True)
    return dataset

test_dataset          = generate_test_data(test)
final_test_dataset    = Data_Set_Class(test_dataset)
predictions_test_data = np.argmax(trainer.predict(final_test_dataset)[0], axis = 1)


***** Running Prediction *****
  Num examples = 4850
  Batch size = 64
  if __name__ == '__main__':


In [15]:

results = predictions_test_data.tolist()
len(results)

4850

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [16]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [17]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [18]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')