In [19]:
import datasets

#import dataset
location = "../Final_dataset/"

test = datasets.load_dataset("csv", data_files=location + "test_atomic.tsv", delimiter="\t")
test = test.rename_column("label", "labels")
test = test['train']

#specil tokens
PREMISE_SPECIAL = "[PREMISE]"
HYPOTHESIS_SPECIAL = "[HYPOTHESIS]"

PREMISE_ADDITIONAL_DESCRIPTION = "[PREMISE_ADDITIONAL_DESCRIPTION]"
HYPOTHESIS_ADDITIONAL_DESCRIPTION = "[HYPOTHESIS_ADDITIONAL_DESCRIPTION]"

id2label = {0: "entailment", 1: "neutral", 2: "contradiction"}
label2id = {"entailment": 0, "neutral": 1, "contradiction": 2}

#change the labels to integers
test = test.map(lambda example: {"labels": label2id[example["labels"]]})

In [20]:
#import finetuned SloBert model
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BatchEncoding
import tensorflow as tf
import torch

model_dir_normal = "train/sloberta/sloberta-finetuned"
model_dir_atomic = "train/sloberta/checkpoint-1500"

#gpu
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_dir_normal, batched=True)
model = AutoModelForSequenceClassification.from_pretrained(model_dir_normal)
model.to(device)

tokenizer_atomic = AutoTokenizer.from_pretrained(model_dir_atomic, batched=True)
model_atomic = AutoModelForSequenceClassification.from_pretrained(model_dir_atomic)
model_atomic.to(device)

#PREPROCESS FUNCTION
def preprocess_function(examples):
	""" inputs = [f"{PREMISE_SPECIAL} {prem} {HYPOTHESIS_SPECIAL} {hyp}"
				for prem, hyp in zip(examples["premise"], examples["hypothesis"])] """
	input = f"{PREMISE_SPECIAL} {examples['premise']} {HYPOTHESIS_SPECIAL} {examples['hypothesis']}"
	model_input = tokenizer(input, truncation=True, padding=True, return_tensors="pt")

	#labels in integers
	#model_inputs["labels"] = tf.convert_to_tensor(examples["labels"], dtype=tf.int64)

	return model_input.to(device)

def preprocess_function_atomic(example):
	""" inputs = [f"{PREMISE_SPECIAL} {prem} {PREMISE_ADDITIONAL_DESCRIPTION} {prem_add_desc} {HYPOTHESIS_SPECIAL} {hyp} {HYPOTHESIS_ADDITIONAL_DESCRIPTION} {hyp_add_desc}"
				for prem, prem_add_desc, hyp, hyp_add_desc in zip(examples["premise"], examples["premise_atomic"], examples["hypothesis"], examples["hypothesis_atomic"])] """
	input = f"{PREMISE_SPECIAL} {example['premise']} {PREMISE_ADDITIONAL_DESCRIPTION} {example['premise_atomic']} {HYPOTHESIS_SPECIAL} {example['hypothesis']} {HYPOTHESIS_ADDITIONAL_DESCRIPTION} {example['hypothesis_atomic']}"
	model_input = tokenizer_atomic(input, truncation=True, padding=True, return_tensors="pt")

	#labels in integers
	#model_inputs["labels"] = tf.convert_to_tensor(examples["labels"], dtype=tf.int64)

	return model_input.to(device)

#preprocess the data
""" test_prep = test.map(preprocess_function, batched=True)
test_prep = test_prep.remove_columns(['premise', 'hypothesis', 'premise_atomic', 'hypothesis_atomic', 'labels'])

test_prep_atomic = test.map(preprocess_function_atomic, batched=True)
test_prep_atomic = test_prep_atomic.remove_columns(['premise', 'hypothesis', 'premise_atomic', 'hypothesis_atomic', 'labels']) """
test_prep = []
test_prep_atomic = []
for i in range(len(test)):
	test_prep.append(preprocess_function(test[i]))
	test_prep_atomic.append(preprocess_function_atomic(test[i]))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [21]:
#evaluate the model
answers = []
answers_atomic = []

for i in range(len(test_prep)):
	outputs = model(**test_prep[i])
	predictions = torch.argmax(outputs.logits, dim=1)
	answers.append(predictions.tolist()[0])

	outputs_atomic = model_atomic(**test_prep_atomic[i])
	predictions_atomic = torch.argmax(outputs_atomic.logits, dim=1)
	answers_atomic.append(predictions_atomic.tolist()[0])

	#progress
	if i % 100 == 0:
		print(f"Progress: {i}/{len(test_prep)}")

Progress: 0/547
Progress: 100/547
Progress: 200/547
Progress: 300/547
Progress: 400/547
Progress: 500/547


In [22]:
def compute_accuracy(predictions):
	accuracy = 0
	for i in range(len(predictions)):
		if predictions[i] == test[i]["labels"]:
			accuracy += 1
	return accuracy / len(predictions)

""" print(answers)
print(answers_atomic) """
accuracy = compute_accuracy(answers)
accuracy_atomic = compute_accuracy(answers_atomic)
print(f"Accuracy: {accuracy}")
print(f"Accuracy Atomic: {accuracy_atomic}")

[1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 0, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 0, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 0, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 

In [23]:
#atomic
test = tokenizer_atomic("we'sopfsdffs 'čldskfčdlkdsfč'dfšosdfs podfsf", return_tensors="pt").to(device)
test2 = tokenizer("qćććććsadsdasdeqweqwewqwe adflskjčs eqweqweeesfsdlkfčsdf sdšofksdfeee", return_tensors="pt").to(device)
outputs = model_atomic(**test)
outputs2 = model(**test2)
print(outputs)
print(outputs2)

SequenceClassifierOutput(loss=None, logits=tensor([[0.0971, 0.0144, 0.0744]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
SequenceClassifierOutput(loss=None, logits=tensor([[ 0.1926,  0.2375, -0.5317]], device='cuda:0',
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
