In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from transformers import AutoModel, AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer
import torch
import torch.nn as nn
import pickle

In [2]:
tokenizer = AutoTokenizer.from_pretrained('KB/bert-base-swedish-cased')

In [3]:
model = torch.load('./results/model_CAR_5_epochs_weighted_loss.pkl')
with open(r'./data/tokenized_CAR_data_eval.pkl', "rb") as input_file:
    test_data = pickle.load(input_file)
tokens = tokenizer.convert_ids_to_tokens(test_data[0]["input_ids"])
print(tokens)
dec = tokenizer.decode(test_data[0]["input_ids"])
print(dec)

['[CLS]', 'Universitet', '##s', '-', 'och', 'högskole', '##lärare', 'Det', 'finns', 'universitets', '-', 'och', 'högskole', '##lärare', 'som', 'huvudsakligen', 'arbetar', 'med', 'undervisning', '.', 'Men', 'det', 'finns', 'också', 'de', 'som', 'nästan', 'enbart', 'ägnar', 'sig', 'åt', 'forskning', '.', 'Arbetsuppgifter', 'I', 'jobbet', 'som', 'universitets', '-', 'och', 'högskole', '##lärare', 'ingår', 'att', 'utveckla', 'pedagogik', '##en', 'och', 'planera', 'utbildningen', '.', 'Universitet', '##s', '-', 'och', 'högskole', '##lärare', 'handled', '##er', 'studenterna', 'i', 'deras', 'arbete', 'och', 'deras', 'studier', 'ska', 'bedömas', 'och', 'exam', '##iner', '##as', '.', 'Universitet', '##s', '-', 'och', 'högskole', '##lärare', 'kan', 'ha', 'många', 'olika', 'titlar', ',', 'till', 'exempel', 'doktorand', ',', 'universitets', '##adjunkt', ',', 'universitets', '##lektor', 'eller', 'professor', '.', 'Doktor', '##and', '.', 'I', 'en', 'doktorand', '##tjänst', 'kombineras', 'ofta', 'ege

In [4]:
# Output class
# https://huggingface.co/docs/transformers/main_classes/output#transformers.modeling_tf_outputs.TFTokenClassifierOutput

model.eval()
test_input = []
test_labels = []
test_attn = []
token_type_ids = []
for i in range(len(test_data)):
    test_input.append(test_data[i]['input_ids'])
    test_labels.append(test_data[i]['labels'])
    test_attn.append(test_data[i]['attention_mask'])
    token_type_ids.append(test_data[i]['token_type_ids'])

print('Num test instances: ', len(test_input))
num_correct = 0
num_predicted = 0
num_pos_data = 0
for i in range(len(test_data)):
    output = model(torch.tensor([test_data[i]['input_ids']]), attention_mask=torch.tensor([test_data[i]['attention_mask']]), token_type_ids=torch.tensor([test_data[i]['token_type_ids']]), labels=torch.tensor([test_data[i]['labels']]))
    print('test idx: ', i)
    print('instance loss: ', output.loss)
    # print(output.logits)
    m = nn.Softmax(dim=2)
    max = m(output.logits)
    out = torch.argmax(max, dim=2)
    # print(max)
    print('Output: ', out)
    # print('Labels: ', test_data[i]['labels'])
    tokens = tokenizer.convert_ids_to_tokens(test_data[i]["input_ids"])
    true_labels = test_data[i]['labels']
    # print(tokens)
    for idx, pred_label in enumerate(out[0]):
        true_label = true_labels[idx]
        if true_label > 0:
            num_pos_data += 1
        if pred_label > 0:
            # print('label: ', pred_label)
            # print('token: ', tokens[idx])
            num_predicted += 1
            if pred_label == true_label:
                num_correct += 1

# calculate precision and recall
pr = num_correct/num_predicted
rec = num_correct/num_pos_data
print('precision: ', pr)
print('recall: ', rec)


97
97
97
test idx:  0
instance loss:  tensor(0.5909, grad_fn=<NllLossBackward>)
Output:  tensor([[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 0, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2,