# Testing the DistilBert fine-tuned on SST-2

In [1]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from datasets import load_dataset
import numpy as np
import evaluate

In [2]:
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")


In [3]:
dataset_test = load_dataset("csv", data_files="books_test.csv")

In [4]:
inputs = tokenizer(dataset_test['train']['body'], truncation=True, return_tensors="pt", padding=True)

In [5]:
with torch.no_grad():
    logits = model(**inputs).logits

In [6]:
load_accuracy = evaluate.load("accuracy")
load_f1 = evaluate.load("f1")
labels = dataset_test['train']['label']

predictions = np.argmax(logits, axis=-1)
accuracy = load_accuracy.compute(predictions=predictions, references=labels)
f1 = load_f1.compute(predictions=predictions, references=labels)
metrics = {"accuracy": accuracy, "f1": f1}

In [7]:
print(metrics)

{'accuracy': {'accuracy': 0.79}, 'f1': {'f1': 0.832}}


### The accuracy is 0.79, while the F1-score is 0.832