In [39]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from transformers import AutoModel, AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer
import torch
import torch.nn as nn
import pickle
from helper import get_labeled_answers, get_predicted_answers

In [40]:
tokenizer = AutoTokenizer.from_pretrained('KB/bert-base-swedish-cased')

In [41]:
model = torch.load('../results/model_CA_3e_ISNS_weighted_loss.pkl')
with open(r'../data/CA/tokenized_CA_data_eval.pkl', "rb") as input_file:
    test_data = pickle.load(input_file)
tokens = tokenizer.convert_ids_to_tokens(test_data[0]["input_ids"])
print(tokens)
dec = tokenizer.decode(test_data[0]["input_ids"])
print(dec)

['[CLS]', 'Eget', 'företag', 'Efter', 'beslut', 'Beslutet', 'skickas', 'till', 'den', 'ambassad', 'eller', 'generalkonsul', '##at', 'som', 'du', 'valde', 'i', 'webb', '##ansökan', '.', 'När', 'du', 'ska', 'hämta', 'ditt', 'beslut', 'ska', 'du', 'ta', 'med', 'ditt', 'pass', '.', 'Du', 'kan', 'få', 'uppehåll', '##stillstånd', 'för', 'två', 'år', 'men', 'aldrig', 'längre', 'än', 'ditt', 'pass', 'är', 'giltigt', '.', 'Om', 'du', 'får', 'uppehåll', '##stillstånd', 'för', 'mer', 'än', 'tre', 'månader', 'får', 'du', 'ett', 'uppehåll', '##stillstånd', '##skort', '.', 'Kort', '##et', 'är', 'ett', 'bevis', 'på', 'att', 'du', 'har', 'tillstånd', 'att', 'vara', 'i', 'Sverige', 'och', 'innehåller', 'bland', 'annat', 'dina', 'fingeravtryck', 'och', 'foto', 'på', 'dig', '.', 'Uppehåll', '##stillstånd', '##skort', '##et', 'tillverkas', 'i', 'samband', 'med', 'att', 'beslutet', 'fattas', ',', 'dock', 'tidigast', 'tre', 'månader', 'innan', 'uppehåll', '##stillståndet', 'börjar', 'gälla', '.', 'Det', 'gå

In [42]:
# TODO: check why this function cannot be exported!!
def compare_labels_and_output(output, labels, tokens):
    labels_stats = []
    for idx, label in enumerate(labels):
        if label == 1:
            count = 1
            next_label = labels[idx+1]
            while idx+count < len(labels) and next_label in [2, -100]:
                count += 1
                next_label = labels[idx+count]
            labels_stats.append((idx, count))
    print(labels_stats)
    num_matches = 0
    for ans in labels_stats:
        ans_start = ans[0]
        is_match = True
        answer_text = []
        for i in range(ans[1]):
            answer_text.append(tokens[ans_start+i])
            if output[ans_start+i] != labels[ans_start+i]:
                is_match = False
        if is_match:
            num_matches += 1
            print('found match: ', ' '.join(answer_text))
    return len(labels_stats), num_matches


In [43]:
# Output class
# https://huggingface.co/docs/transformers/main_classes/output#transformers.modeling_tf_outputs.TFTokenClassifierOutput

model.eval()
test_input = []
test_labels = []
test_attn = []
token_type_ids = []
for i in range(len(test_data)):
    test_input.append(test_data[i]['input_ids'])
    test_labels.append(test_data[i]['labels'])
    test_attn.append(test_data[i]['attention_mask'])
    token_type_ids.append(test_data[i]['token_type_ids'])

print(len(test_input))
print(len(test_labels))
print(len(test_attn))
num_correct = 0
num_predicted = 0
num_pos_data = 0

total_num_answers = 0
total_exact_match = 0
for i in range(len(test_data)):
    output = model(torch.tensor([test_data[i]['input_ids']]), attention_mask=torch.tensor([test_data[i]['attention_mask']]), token_type_ids=torch.tensor([test_data[i]['token_type_ids']]), labels=torch.tensor([test_data[i]['labels']]))
    print('test idx: ', i)
    print('instance loss: ', output.loss)
    # print(output.logits)
    m = nn.Softmax(dim=2)
    max = m(output.logits)
    out = torch.argmax(max, dim=2)
    # print(max)
    print('Output length: ', out[0])
    # print('labels length: ', len(test_data[i]['labels']))
    # print('Labels: ', test_data[i]['labels'])
    tokens = tokenizer.convert_ids_to_tokens(test_data[i]["input_ids"])
    true_labels = test_data[i]['labels']
    # print(tokens)
    get_labeled_answers(true_labels, tokens)
    get_predicted_answers(out[0], tokens)
    num_answers, num_exact_matches = compare_labels_and_output(out[0], true_labels, tokens)
    total_num_answers += num_answers
    total_exact_match += num_exact_matches
    for idx, pred_label in enumerate(out[0]):
        true_label = true_labels[idx]
        if true_label > 0:
            num_pos_data += 1
        if pred_label > 0:
            # print('label: ', pred_label)
            # print('token: ', tokens[idx])
            num_predicted += 1
            if pred_label == true_label:
                num_correct += 1

# calculate precision and recall
pr = num_correct/num_predicted
rec = num_correct/num_pos_data
print('precision: ', pr)
print('recall: ', rec)

print('percentage of exact matches: ', total_exact_match/total_num_answers)


98
98
98
test idx:  0
instance loss:  tensor(0.1984, grad_fn=<NllLossBackward>)
Output length:  tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0