In [6]:
import numpy as np
import pandas as pd
import glob
import os
import tensorflow as tf
import transformers
from transformers import TFBertForTokenClassification
from tqdm.notebook import tqdm

import sys
sys.path.append("..")
from data_preparation.data_preparation_pos import ABSATokenizer, convert_examples_to_tf_dataset, read_conll

## Zero-shot

In [2]:
import tensorflow.keras.backend as K
def ignore_acc(y_true_class, y_pred_class, class_to_ignore=0):
    y_pred_class = K.cast(K.argmax(y_pred_class, axis=-1), 'int32')
    y_true_class = K.cast(y_true_class, 'int32')
    ignore_mask = K.cast(K.not_equal(y_true_class, class_to_ignore), 'int32')
    matches = K.cast(K.equal(y_true_class, y_pred_class), 'int32') * ignore_mask
    accuracy = K.sum(matches) / K.maximum(K.sum(ignore_mask), 1)
    return accuracy

In [3]:
tagset = ["O", "_", "ADJ", "ADP", "ADV", "AUX", "CCONJ", "DET", "INTJ", "NOUN", "NUM", 
          "PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X"]
tokenizer = ABSATokenizer.from_pretrained('bert-base-multilingual-cased')
num_labels = len(tagset)
label_map = {label: i for i, label in enumerate(tagset)}
config = transformers.BertConfig.from_pretrained('bert-base-multilingual-cased', num_labels=num_labels)
model = TFBertForTokenClassification.from_pretrained('bert-base-multilingual-cased',
                                                     config=config)
model.load_weights("../checkpoints_vi/multibert_pos_0.861_256.hdf5")
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(loss=loss, metrics=[ignore_acc])

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertForTokenClassification: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of TFBertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['dropout_37', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
def load_data(path, batch_size):
    """Loads conllu file, returns a list of dictionaries (one for each sentence) and a TF dataset"""
    test_data = read_conll(glob.glob(path + "/*-test.conllu")[0])
    test_examples = [{"id": sent_id, "tokens": tokens, "tags": tags} for sent_id, tokens, tags in zip(test_data[0], 
                                                                                                      test_data[1],
                                                                                                      test_data[2])]
    test_dataset = convert_examples_to_tf_dataset(examples=test_examples, tokenizer=tokenizer, tagset=tagset, max_length=256)
    test_dataset = test_dataset.batch(batch_size)
    return test_examples, test_dataset

In [5]:
def filter_padding_tokens(test_examples, preds, label_map):
    """Filters padding tokens, labels, predictions and logits, then returns these as flattened lists"""
    filtered_preds = []
    labels = []
    tokens = []
    logits = []

    for i in range(len(test_examples)):
        example_tokens, example_labels, _ = tokenizer.subword_tokenize(test_examples[i]["tokens"], test_examples[i]["tags"])
        example_labels = [label_map[label] for label in example_labels]
        example_preds = preds[0].argmax(axis=-1)[i, :len(example_labels)]
        example_logits = preds[0][i, :len(example_labels)]
        filtered_preds.extend(example_preds)
        labels.extend(example_labels)
        tokens.extend(example_tokens)
        logits.extend(example_logits)
        
    return tokens, labels, filtered_preds, logits

In [6]:
def find_subword_locations(tokens):
    """Finds the starting and ending index of words that have been broken into subwords"""
    subword_locations = []

    for i in range(len(tokens)):
        if tokens[i].startswith("##") and not(tokens[i-1].startswith("##")):
            start = i - 1
        if not(tokens[i].startswith("##")) and tokens[i-1].startswith("##") and i != 0:
            end = i
            subword_locations.append((start, end))
            
    return subword_locations

In [7]:
def reconstruct_subwords(subword_locations, tokens, labels, filtered_preds, logits):
    """Assemble subwords back into the original word in the global lists of tokens, labels and predictions,
    and select a predicted tag"""
    new_tokens = []
    new_preds = []
    new_labels = []
    prev_end = 0

    for start, end in subword_locations:
        if len(set(filtered_preds[start:end])) > 1:
            # Subword predictions do not all agree
            temp = np.array([(M.max(), M.argmax()) for M in logits[start:end]])
            prediction = temp[temp[:,0].argmax(), 1]
        else:
            prediction = filtered_preds[start]
        new_preds += filtered_preds[prev_end:start] + [prediction]
        token = "".join(tokens[start:end]).replace("##", "")
        new_tokens += tokens[prev_end:start] + [token]
        new_labels += labels[prev_end:start] + [labels[start]]
        prev_end = end

    # Last subword onwards
    new_preds += filtered_preds[prev_end:]
    new_tokens += tokens[prev_end:]
    new_labels += labels[prev_end:]
    
    return new_tokens, new_labels, new_preds

In [8]:
data_dir = "../data/ud/"
pos_eval = {}
for directory in tqdm(os.listdir(data_dir)):
    path = os.path.join(data_dir, directory)
    batch_size = 256 # Doesn't really matter here
    test_examples, test_dataset = load_data(path, batch_size)
    preds = model.predict(test_dataset, steps=np.ceil(len(test_examples) / batch_size))
    tokens, labels, filtered_preds, logits = filter_padding_tokens(test_examples, preds, label_map)
    subword_locations = find_subword_locations(tokens)
    new_tokens, new_labels, new_preds = reconstruct_subwords(subword_locations, tokens, labels, filtered_preds, logits)
    pos_eval[directory] = (np.array(new_labels) == np.array(new_preds)).mean()

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




In [9]:
pos_eval  

{'ar': 0.592356994843622,
 'bg': 0.6243322309844823,
 'en': 0.5568952524491334,
 'eu': 0.5540329859686551,
 'fi': 0.5671528698617162,
 'he': 0.4644249190724714,
 'hr': 0.575886232481451,
 'ja': 0.44999438349496385,
 'ko': 0.5300289435600579,
 'ru': 0.6244252725112363,
 'sl': 0.5942585201105312,
 'th': 0.5106373449187083,
 'tr': 0.508383129314846,
 'vi': 0.8762649173005792,
 'zh': 0.5116974083586271}

In [10]:
{k: str(round(v * 100, 2)) + "%" for k, v in sorted(pos_eval.items(), key=lambda item: item[1], reverse=True)}

{'vi': '87.63%',
 'ru': '62.44%',
 'bg': '62.43%',
 'sl': '59.43%',
 'ar': '59.24%',
 'hr': '57.59%',
 'fi': '56.72%',
 'en': '55.69%',
 'eu': '55.4%',
 'ko': '53.0%',
 'zh': '51.17%',
 'th': '51.06%',
 'tr': '50.84%',
 'he': '46.44%',
 'ja': '45.0%'}

Load results excel

In [11]:
results_path = "../results/results_pos.xlsx"

if os.path.isfile(results_path):
    results = pd.read_excel(results_path, sheet_name=None)
else:
    results = {}

Check if the sheet already exists

In [12]:
sheet = "results_pos_vi"

if sheet in results:
    raise Exception("Sheet already exists and would be overwritten, aborting")
else:
    results[sheet] = pd.DataFrame({"Language": list(pos_eval.keys()), "Test_acc": list(pos_eval.values())})

Save all sheets into excel file

In [13]:
with pd.ExcelWriter("../results/results_pos.xlsx") as writer:
    for sheet_name, df in results.items():
        df.to_excel(writer, index=False, sheet_name=sheet_name)

## Example

In [39]:
test_data = read_conll("../data/ud/fi/fi_pud-ud-test.conllu")
test_examples = [{"id": sent_id, "tokens": tokens, "tags": tags} for sent_id, tokens, tags in zip(test_data[0], 
                                                                                                  test_data[1],
                                                                                                  test_data[2])]

In [58]:
# All languages
path = "../data/ud/"
test_examples = []

for lang in os.listdir(path):
    test_data = read_conll(glob.glob(path + lang + "/*-test.conllu")[0])
    temp = [{"id": sent_id, "tokens": tokens, "tags": tags} for sent_id, tokens, tags in zip(test_data[0], 
                                                                                                  test_data[1],
                                                                                                  test_data[2])]
    test_examples.extend(temp)

In [60]:
batch_size = 256
test_dataset = convert_examples_to_tf_dataset(examples=test_examples, tokenizer=tokenizer, tagset=tagset, max_length=256)
test_dataset = test_dataset.batch(batch_size)

In [64]:
example_batch = test_dataset.as_numpy_iterator().next()

for token, label in zip(example_batch[0]["input_ids"][0], example_batch[1][0]):
    if token == 0:
        break
    print("{:<25}{:<20}".format(tokenizer.decode(int(token)), tagset[label]))

ك ت ب                    VERB                
# # ت                    VERB                
ك                        PROPN               
# # و ر ي                PROPN               
ش                        PROPN               
# # و ل                  PROPN               
# # م ا ن                PROPN               
,                        PUNCT               
ا ل                      NOUN                
# # م س ا ع د ة          NOUN                
ا ل خ ا ص ة              ADJ                 
ل                        ADP                 
أ و                      PROPN               
# # ب ا                  PROPN               
# # م ا                  PROPN               
ف ي                      ADP                 
ت                        NOUN                
# # د و                  NOUN                
# # ي ن ة                NOUN                
ن ش ر                    VERB                
# # ت                    VERB                
ه ا                      PRON     

In [65]:
model.evaluate(test_dataset)



[0.2904379665851593, 0.6831367015838623]

In [66]:
preds = model.predict(test_dataset, steps=np.ceil(len(test_examples) / batch_size), verbose=1)



In [67]:
filtered_preds = []
labels = []
tokens = []
logits = []

for i in range(len(test_examples)):
    example_tokens, example_labels, _ = tokenizer.subword_tokenize(test_examples[i]["tokens"], test_examples[i]["tags"])
    example_labels = [label_map[label] for label in example_labels]
    example_preds = preds[0].argmax(axis=-1)[i, :len(example_labels)]
    example_logits = preds[0][i, :len(example_labels)]
    filtered_preds.extend(example_preds)
    labels.extend(example_labels)
    tokens.extend(example_tokens)
    logits.extend(example_logits)

In [68]:
(np.array(labels) == np.array(filtered_preds)).mean()

0.6518227627224638

In [46]:
flattened = []
iterator = test_dataset.as_numpy_iterator()
accuracies = []
temp_preds = filtered_preds.copy()

for batch in iterator:
    batch_labels = batch[1][batch[1] != 0]
    flattened.extend(batch_labels)
    accuracies.append((np.array(batch[1][batch[1] != 0]) == np.array(temp_preds[:len(batch_labels)])).mean())
    temp_preds = temp_preds[len(batch_labels):]

In [47]:
np.mean(accuracies)

0.8352089574000712

In [48]:
(np.array(labels) == np.array(flattened)).mean()

1.0

In [69]:
start = None
end = None
subword_locations = []

for i in range(len(tokens)):
    if tokens[i].startswith("##") and not(tokens[i-1].startswith("##")):
        start = i - 1
    if not(tokens[i].startswith("##")) and tokens[i-1].startswith("##"):
        end = i
        subword_locations.append((start, end))

HBox(children=(FloatProgress(value=0.0, max=526097.0), HTML(value='')))




In [70]:
truths = []
final_most_voted = []
final_avg = []
final_first = []
final_random = []
final_max_prob = []
final_random_equi = []

for start, end in subword_locations:
    if len(set(filtered_preds[start:end])) > 1:
        print(start, end)
        print("Tokens:", tokens[start:end])
        print("Predictions:", filtered_preds[start:end])
        print("Truth:", labels[start])
        truths.append(labels[start])
        
        most_voted = max(set(filtered_preds[start:end]), key=filtered_preds[start:end].count)
        final_most_voted.append(most_voted)
        avg = sum(logits[start:end]).argmax()
        final_avg.append(avg)
        final_first.append(filtered_preds[start])
        final_random.append(np.random.choice(filtered_preds[start:end]))
        temp = np.array([(M.max(), M.argmax()) for M in logits[start:end]])
        final_max_prob.append(temp[temp[:,0].argmax(), 1])
        final_random_equi.append(np.random.choice(list(set(filtered_preds[start:end]))))

HBox(children=(FloatProgress(value=0.0, max=127717.0), HTML(value='')))

8 10
Tokens: ['ال', '##مساعدة']
Predictions: [7, 9]
Truth: 9
Final prediction: 9 

23 26
Tokens: ['ال', '##إ', '##ثنين']
Predictions: [7, 9, 10]
Truth: 13
Final prediction: 9 

32 36
Tokens: ['ال', '##ان', '##ت', '##قال']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

50 54
Tokens: ['ال', '##ان', '##ت', '##قال']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

58 61
Tokens: ['ل', '##سل', '##طة']
Predictions: [3, 9, 9]
Truth: 9
Final prediction: 9 

70 73
Tokens: ['ال', '##نس', '##بة']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

82 86
Tokens: ['ال', '##ان', '##ت', '##قال']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

107 110
Tokens: ['ال', '##شي', '##ء']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

112 115
Tokens: ['ب', '##عيد', '##اً']
Predictions: [4, 2, 4]
Truth: 2
Final prediction: 4 

124 126
Tokens: ['ال', '##حد']
Predictions: [7, 9]
Truth: 9
Final prediction: 9 

127 130
Tokens: ['ال', '##ه', '##جرة']
Predictions: [7, 9, 9]
Truth: 

3730 3733
Tokens: ['ن', '##حت', '##اج']
Predictions: [5, 17, 17]
Truth: 17
Final prediction: 17 

3758 3761
Tokens: ['ال', '##حمل', '##ة']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

3772 3774
Tokens: ['ي', '##قابل']
Predictions: [12, 17]
Truth: 17
Final prediction: 17 

3774 3776
Tokens: ['ال', '##طلاب']
Predictions: [7, 9]
Truth: 9
Final prediction: 9 

3797 3799
Tokens: ['ت', '##قول']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

3800 3803
Tokens: ['ال', '##سل', '##وى']
Predictions: [7, 13, 13]
Truth: 9
Final prediction: 13 

3804 3806
Tokens: ['ت', '##أتي']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

3842 3845
Tokens: ['ال', '##نس', '##بة']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

3849 3854
Tokens: ['ال', '##مت', '##أ', '##لم', '##ين']
Predictions: [7, 9, 9, 9, 9]
Truth: 9
Final prediction: 9 

3863 3866
Tokens: ['ي', '##مل', '##ك']
Predictions: [5, 17, 17]
Truth: 17
Final prediction: 17 

3866 3870
Tokens: ['ال', '##أ', '##طب', '##اء']
Pred

Tokens: ['ن', '##ن', '##حي']
Predictions: [5, 17, 17]
Truth: 17
Final prediction: 17 

7212 7214
Tokens: ['ن', '##ا']
Predictions: [10, 11]
Truth: 12
Final prediction: 10 

7217 7219
Tokens: ['ت', '##تضمن']
Predictions: [12, 17]
Truth: 17
Final prediction: 17 

7223 7226
Tokens: ['غ', '##راب', '##ة']
Predictions: [2, 9, 9]
Truth: 9
Final prediction: 9 

7227 7230
Tokens: ['ال', '##س', '##فير']
Predictions: [7, 13, 13]
Truth: 13
Final prediction: 13 

7235 7240
Tokens: ['ال', '##مس', '##تش', '##في', '##ات']
Predictions: [7, 13, 13, 13, 13]
Truth: 13
Final prediction: 13 

7254 7257
Tokens: ['ل', '##أ', '##عمال']
Predictions: [3, 9, 13]
Truth: 13
Final prediction: 9 

7259 7261
Tokens: ['ن', '##ي']
Predictions: [8, 5]
Truth: 12
Final prediction: 8 

7306 7308
Tokens: ['ن', '##ا']
Predictions: [12, 8]
Truth: 12
Final prediction: 8 

7309 7311
Tokens: ['ن', '##سمح']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

7312 7315
Tokens: ['ل', '##ر', '##ئاسة']
Predictions: [3, 13, 13]
Truth

Final prediction: 9 

10896 10898
Tokens: ['ي', '##ألف']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

10899 10901
Tokens: ['ال', '##بشر']
Predictions: [7, 9]
Truth: 9
Final prediction: 9 

10907 10909
Tokens: ['عامل', '##ت']
Predictions: [17, 13]
Truth: 9
Final prediction: 17 

10911 10913
Tokens: ['فيلم', '##ٍ']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

10921 10924
Tokens: ['ك', '##وت', '##ون']
Predictions: [9, 9, 13]
Truth: 13
Final prediction: 9 

10924 10927
Tokens: ['ال', '##شر', '##يرة']
Predictions: [7, 2, 2]
Truth: 2
Final prediction: 2 

10931 10933
Tokens: ['نوع', '##ٍ']
Predictions: [9, 16]
Truth: 9
Final prediction: 16 

10934 10937
Tokens: ['ال', '##رغ', '##بة']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

10939 10941
Tokens: ['ال', '##قتل']
Predictions: [7, 9]
Truth: 9
Final prediction: 9 

10954 10957
Tokens: ['ال', '##قر', '##يبة']
Predictions: [7, 2, 2]
Truth: 2
Final prediction: 2 

10966 10968
Tokens: ['مهم', '##اً']
Predictions: [2, 4]
Tr

Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

14896 14899
Tokens: ['ال', '##سا', '##حل']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

14899 14901
Tokens: ['مشابه', '##ٌ']
Predictions: [2, 11]
Truth: 2
Final prediction: 2 

14904 14908
Tokens: ['ال', '##ص', '##حر', '##اء']
Predictions: [13, 9, 9, 9]
Truth: 13
Final prediction: 9 

14918 14920
Tokens: ['شكل', '##ٍ']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

14937 14941
Tokens: ['ال', '##م', '##جا', '##عة']
Predictions: [7, 13, 13, 13]
Truth: 9
Final prediction: 13 

14951 14955
Tokens: ['ال', '##ح', '##ضا', '##رات']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

14969 14971
Tokens: ['شكل', '##ٍ']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

14986 14990
Tokens: ['ال', '##است', '##عم', '##ار']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

14991 14994
Tokens: ['ال', '##حر', '##وب']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

14995 14998
Tokens: ['ال', '##إ', '##ضافة']
Predi

18677 18681
Tokens: ['ال', '##آ', '##لي', '##ات']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

18689 18692
Tokens: ['أ', '##رج', '##اء']
Predictions: [9, 9, 13]
Truth: 9
Final prediction: 9 

18717 18719
Tokens: ['جزء', '##اً']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

18719 18721
Tokens: ['واحد', '##اً']
Predictions: [10, 4]
Truth: 2
Final prediction: 10 

18726 18729
Tokens: ['ال', '##ع', '##صور']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

18742 18747
Tokens: ['ال', '##م', '##ؤ', '##رخ', '##ين']
Predictions: [7, 9, 9, 9, 9]
Truth: 9
Final prediction: 9 

18751 18756
Tokens: ['ال', '##م', '##ث', '##قف', '##ين']
Predictions: [7, 9, 9, 9, 9]
Truth: 9
Final prediction: 9 

18757 18759
Tokens: ['أعمال', '##ٍ']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

18772 18777
Tokens: ['ن', '##بو', '##ء', '##ة', '##ٌ']
Predictions: [9, 9, 9, 9, 16]
Truth: 9
Final prediction: 9 

18777 18779
Tokens: ['ت', '##قول']
Predictions: [5, 17]
Truth: 17
Final prediction:

Predictions: [9, 16]
Truth: 9
Final prediction: 16 

23117 23122
Tokens: ['م', '##ت', '##أ', '##خر', '##ٍ']
Predictions: [2, 2, 2, 2, 14]
Truth: 2
Final prediction: 2 

23122 23124
Tokens: ['نوع', '##اً']
Predictions: [9, 10]
Truth: 4
Final prediction: 9 

23129 23131
Tokens: ['عدد', '##اً']
Predictions: [9, 10]
Truth: 9
Final prediction: 9 

23156 23158
Tokens: ['وقت', '##ٍ']
Predictions: [9, 16]
Truth: 9
Final prediction: 16 

23158 23163
Tokens: ['م', '##ت', '##أ', '##خر', '##ٍ']
Predictions: [2, 2, 2, 2, 14]
Truth: 2
Final prediction: 2 

23167 23171
Tokens: ['ف', '##ار', '##قا', '##ً']
Predictions: [13, 13, 13, 4]
Truth: 9
Final prediction: 13 

23171 23173
Tokens: ['كبير', '##اً']
Predictions: [2, 4]
Truth: 2
Final prediction: 2 

23175 23178
Tokens: ['ال', '##راد', '##ار']
Predictions: [13, 9, 9]
Truth: 9
Final prediction: 9 

23182 23185
Tokens: ['ي', '##شته', '##ر']
Predictions: [5, 17, 17]
Truth: 17
Final prediction: 17 

23193 23195
Tokens: ['دور', '##اً']
Predictions: [9, 4

27951 27953
Tokens: ['وفق', '##اً']
Predictions: [2, 4]
Truth: 9
Final prediction: 2 

27969 27972
Tokens: ['أ', '##عداد', '##ٍ']
Predictions: [9, 9, 11]
Truth: 9
Final prediction: 9 

27975 27978
Tokens: ['ال', '##عظم', '##ى']
Predictions: [7, 13, 13]
Truth: 2
Final prediction: 13 

27985 27989
Tokens: ['ال', '##ات', '##فاق', '##ية']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

27994 27996
Tokens: ['ي', '##حدد']
Predictions: [12, 17]
Truth: 17
Final prediction: 17 

27997 27999
Tokens: ['عدد', '##اً']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

28000 28003
Tokens: ['ال', '##شر', '##وط']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

28011 28014
Tokens: ['ال', '##ات', '##فاق']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

28015 28017
Tokens: ['حي', '##ز']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

28017 28020
Tokens: ['ال', '##تن', '##فيذ']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

28051 28055
Tokens: ['ال', '##ع', '##قوب', '##ات']

Predictions: [13, 2, 13]
Truth: 2
Final prediction: 13 

32109 32112
Tokens: ['أ', '##سم', '##اك']
Predictions: [13, 9, 13]
Truth: 9
Final prediction: 13 

32119 32121
Tokens: ['مكان', '##ٍ']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

32130 32134
Tokens: ['ال', '##است', '##جم', '##ام']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

32135 32138
Tokens: ['ال', '##نس', '##بة']
Predictions: [7, 9, 9]
Truth: 9
Final prediction: 9 

32170 32174
Tokens: ['ال', '##تس', '##عين', '##يات']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

32195 32199
Tokens: ['ال', '##ست', '##ينيا', '##ت']
Predictions: [7, 9, 9, 9]
Truth: 9
Final prediction: 9 

32205 32210
Tokens: ['ان', '##خ', '##فا', '##ضا', '##ً']
Predictions: [9, 9, 9, 9, 4]
Truth: 9
Final prediction: 9 

32210 32213
Tokens: ['ح', '##اد', '##اً']
Predictions: [2, 2, 4]
Truth: 2
Final prediction: 2 

32227 32229
Tokens: ['ال', '##فوز']
Predictions: [12, 17]
Truth: 9
Final prediction: 17 

32231 32234
Tokens: ['ال', '

Final prediction: 17 

37700 37703
Tokens: ['Не', '##гов', '##ите']
Predictions: [12, 12, 11]
Truth: 7
Final prediction: 12 

37725 37727
Tokens: ['Да', '##л']
Predictions: [8, 5]
Truth: 17
Final prediction: 8 

37753 37755
Tokens: ['Все', '##ки']
Predictions: [8, 4]
Truth: 7
Final prediction: 8 

37766 37769
Tokens: ['И', '##два', '##нето']
Predictions: [17, 17, 9]
Truth: 9
Final prediction: 17 

37801 37803
Tokens: ['Как', '##во']
Predictions: [5, 12]
Truth: 7
Final prediction: 12 

37850 37852
Tokens: ['Как', '##во']
Predictions: [8, 12]
Truth: 7
Final prediction: 8 

37860 37862
Tokens: ['Т', '##и']
Predictions: [4, 5]
Truth: 12
Final prediction: 4 

37868 37872
Tokens: ['т', '##ър', '##си', '##ш']
Predictions: [17, 17, 17, 8]
Truth: 17
Final prediction: 17 

37895 37898
Tokens: ['И', '##ска', '##м']
Predictions: [12, 17, 5]
Truth: 17
Final prediction: 17 

37907 37911
Tokens: ['За', '##вид', '##ях', '##а']
Predictions: [17, 17, 17, 12]
Truth: 17
Final prediction: 17 

37943 37945


48612 48615
Tokens: ['ал', '##бан', '##ци']
Predictions: [9, 13, 9]
Truth: 9
Final prediction: 9 

48685 48688
Tokens: ['цар', '##ува', '##нето']
Predictions: [9, 17, 9]
Truth: 9
Final prediction: 9 

48720 48723
Tokens: ['об', '##явява', '##не']
Predictions: [17, 17, 9]
Truth: 9
Final prediction: 17 

48746 48750
Tokens: ['при', '##до', '##бива', '##не']
Predictions: [17, 17, 17, 9]
Truth: 9
Final prediction: 17 

48793 48796
Tokens: ['поста', '##вя', '##не']
Predictions: [17, 17, 9]
Truth: 9
Final prediction: 17 

48821 48831
Tokens: ['Р', '##аб', '##от', '##ни', '##ческо', '##-', '##мени', '##дж', '##ър', '##ските']
Predictions: [2, 2, 2, 2, 9, 9, 9, 9, 9, 2]
Truth: 2
Final prediction: 9 

48858 48862
Tokens: ['при', '##до', '##бити', '##те']
Predictions: [17, 17, 17, 12]
Truth: 2
Final prediction: 17 

48879 48883
Tokens: ['к', '##уп', '##увач', '##ите']
Predictions: [9, 17, 9, 9]
Truth: 9
Final prediction: 9 

48917 48919
Tokens: ['бит', '##ови']
Predictions: [9, 2]
Truth: 2
Final

Predictions: [10, 10, 9, 9, 9]
Truth: 9
Final prediction: 9 

60109 60114
Tokens: ['20', '##-', '##годи', '##шни', '##ната']
Predictions: [10, 10, 9, 9, 9]
Truth: 9
Final prediction: 9 

60115 60117
Tokens: ['ней', '##ната']
Predictions: [12, 11]
Truth: 7
Final prediction: 11 

60131 60133
Tokens: ['ви', '##е']
Predictions: [18, 8]
Truth: 12
Final prediction: 8 

60155 60159
Tokens: ['сан', '##д', '##ъ', '##ци']
Predictions: [9, 13, 13, 9]
Truth: 9
Final prediction: 9 

60160 60164
Tokens: ['По', '##ве', '##чет', '##о']
Predictions: [4, 4, 12, 10]
Truth: 4
Final prediction: 4 

60177 60179
Tokens: ['път', '##ния']
Predictions: [9, 2]
Truth: 2
Final prediction: 9 

60230 60232
Tokens: ['нови', '##я']
Predictions: [2, 7]
Truth: 2
Final prediction: 2 

60399 60401
Tokens: ['г', '##.']
Predictions: [18, 14]
Truth: 9
Final prediction: 18 

60444 60448
Tokens: ['об', '##яс', '##ним', '##о']
Predictions: [17, 17, 17, 2]
Truth: 2
Final prediction: 17 

60449 60452
Tokens: ['В', '##тор', '##о']

Truth: 5
Final prediction: 17 

86823 86826
Tokens: ['jau', '##rti', '##z']
Predictions: [9, 9, 4]
Truth: 17
Final prediction: 9 

86827 86829
Tokens: ['Ora', '##in']
Predictions: [9, 2]
Truth: 4
Final prediction: 9 

86833 86835
Tokens: ['dug', '##u']
Predictions: [5, 18]
Truth: 5
Final prediction: 18 

86843 86847
Tokens: ['arra', '##unk', '##erar', '##a']
Predictions: [9, 9, 2, 18]
Truth: 9
Final prediction: 9 

86867 86870
Tokens: ['Lagu', '##ntza', '##rekin']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

86896 86899
Tokens: ['bat', '##zu', '##ek']
Predictions: [9, 12, 12]
Truth: 7
Final prediction: 12 

86900 86902
Tokens: ['orde', '##a']
Predictions: [9, 8]
Truth: 6
Final prediction: 8 

86924 86926
Tokens: ['hon', '##a']
Predictions: [12, 3]
Truth: 4
Final prediction: 3 

86928 86930
Tokens: ['asko', '##k']
Predictions: [9, 5]
Truth: 7
Final prediction: 9 

86935 86937
Tokens: ['He', '##men']
Predictions: [2, 9]
Truth: 4
Final prediction: 9 

86939 86942
Tokens: ['etxek'

Tokens: ['dono', '##stia', '##rrek']
Predictions: [13, 2, 2]
Truth: 9
Final prediction: 2 

91248 91251
Tokens: ['za', '##iz', '##kio']
Predictions: [17, 18, 17]
Truth: 5
Final prediction: 17 

91252 91256
Tokens: ['ja', '##be', '##goa', '##k']
Predictions: [9, 9, 9, 18]
Truth: 9
Final prediction: 9 

91263 91265
Tokens: ['ditu', '##gu']
Predictions: [17, 8]
Truth: 5
Final prediction: 8 

91268 91270
Tokens: ['luz', '##ean']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

91277 91279
Tokens: ['Santander', '##ri']
Predictions: [13, 11]
Truth: 13
Final prediction: 11 

91282 91284
Tokens: ['segundo', '##ko']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

91284 91286
Tokens: ['aldea', '##rekin']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

91291 91294
Tokens: ['luz', '##ear', '##i']
Predictions: [9, 9, 11]
Truth: 9
Final prediction: 9 

91299 91302
Tokens: ['sa', '##rri', '##tan']
Predictions: [9, 9, 4]
Truth: 4
Final prediction: 9 

91305 91308
Tokens: ['hit', '##zet', '##an

Final prediction: 17 

95151 95153
Tokens: ['dela', '##ko']
Predictions: [4, 2]
Truth: 5
Final prediction: 2 

95158 95161
Tokens: ['hil', '##keta', '##ren']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

95165 95168
Tokens: ['Ni', '##regat', '##ik']
Predictions: [2, 4, 4]
Truth: 12
Final prediction: 4 

95174 95176
Tokens: ['nuk', '##e']
Predictions: [5, 8]
Truth: 5
Final prediction: 8 

95177 95182
Tokens: ['entre', '##nat', '##zai', '##lea', '##k']
Predictions: [9, 9, 9, 9, 18]
Truth: 9
Final prediction: 9 

95189 95191
Tokens: ['behar', '##ko']
Predictions: [17, 8]
Truth: 17
Final prediction: 8 

95234 95238
Tokens: ['za', '##uri', '##tuta', '##ko']
Predictions: [17, 17, 17, 2]
Truth: 17
Final prediction: 17 

95239 95242
Tokens: ['gi', '##dari', '##ari']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

95245 95247
Tokens: ['nor', '##i']
Predictions: [4, 5]
Truth: 12
Final prediction: 4 

95248 95250
Tokens: ['dio', '##zu']
Predictions: [17, 12]
Truth: 5
Final predictio

Tokens: ['ona', '##rte', '##zin', '##tzat']
Predictions: [9, 9, 18, 4]
Truth: 2
Final prediction: 9 

99424 99426
Tokens: ['Bai', '##na']
Predictions: [2, 10]
Truth: 6
Final prediction: 2 

99433 99436
Tokens: ['Er', '##raz', '##a']
Predictions: [4, 2, 2]
Truth: 2
Final prediction: 2 

99437 99440
Tokens: ['be', '##giz', '##koa']
Predictions: [9, 9, 2]
Truth: 9
Final prediction: 9 

99457 99460
Tokens: ['batera', '##tzea', '##k']
Predictions: [9, 9, 12]
Truth: 17
Final prediction: 9 

99464 99467
Tokens: ['u', '##xa', '##tzea']
Predictions: [17, 17, 9]
Truth: 17
Final prediction: 17 

99484 99486
Tokens: ['zen', '##ean']
Predictions: [17, 4]
Truth: 5
Final prediction: 17 

99487 99491
Tokens: ['Jan', '##kau', '##skas', '##ek']
Predictions: [13, 13, 13, 5]
Truth: 13
Final prediction: 13 

99516 99519
Tokens: ['iga', '##nde', '##an']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

99535 99538
Tokens: ['iga', '##nde', '##an']
Predictions: [2, 9, 17]
Truth: 9
Final prediction: 9 

99

Truth: 9
Final prediction: 10 

103683 103685
Tokens: ['talde', '##an']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

103694 103696
Tokens: ['finale', '##an']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

103702 103706
Tokens: ['ek', '##art', '##zea', '##n']
Predictions: [17, 17, 17, 15]
Truth: 17
Final prediction: 17 

103708 103710
Tokens: ['mia', '##tu']
Predictions: [17, 9]
Truth: 17
Final prediction: 17 

103735 103738
Tokens: ['herri', '##tar', '##ren']
Predictions: [9, 9, 12]
Truth: 9
Final prediction: 9 

103747 103752
Tokens: ['Sa', '##ski', '##bal', '##oia', '##n']
Predictions: [9, 9, 9, 9, 3]
Truth: 9
Final prediction: 9 

103752 103755
Tokens: ['se', '##kula', '##ko']
Predictions: [9, 9, 2]
Truth: 2
Final prediction: 9 

103764 103766
Tokens: ['lana', '##ri']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

103785 103792
Tokens: ['pre', '##bent', '##zio', '##-', '##neur', '##rir', '##ako']
Predictions: [9, 9, 9, 9, 9, 2, 2]
Truth: 9
Final prediction: 9 

103799

107931 107933
Tokens: ['urte', '##tik']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

107933 107936
Tokens: ['gora', '##ko', '##en']
Predictions: [2, 2, 12]
Truth: 3
Final prediction: 2 

107969 107974
Tokens: ['%', '##0', '##,', '##3', '##tik']
Predictions: [10, 10, 10, 10, 3]
Truth: 10
Final prediction: 10 

107984 107987
Tokens: ['au', '##kera', '##rik']
Predictions: [9, 9, 2]
Truth: 9
Final prediction: 9 

107997 108000
Tokens: ['era', '##so', '##an']
Predictions: [9, 9, 17]
Truth: 9
Final prediction: 9 

108000 108002
Tokens: ['nahi', '##ko']
Predictions: [4, 2]
Truth: 4
Final prediction: 2 

108015 108018
Tokens: ['Sa', '##eze', '##k']
Predictions: [13, 13, 12]
Truth: 13
Final prediction: 13 

108019 108022
Tokens: ['ep', '##aile', '##ari']
Predictions: [9, 9, 17]
Truth: 9
Final prediction: 9 

108028 108030
Tokens: ['izan', '##aren']
Predictions: [2, 12]
Truth: 17
Final prediction: 2 

108038 108040
Tokens: ['Argentina', '##n']
Predictions: [13, 3]
Truth: 13
Final predictio

Predictions: [2, 9, 4]
Truth: 9
Final prediction: 9 

112074 112076
Tokens: ['urte', '##etako']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

112081 112084
Tokens: ['elk', '##arre', '##kin']
Predictions: [12, 12, 4]
Truth: 12
Final prediction: 12 

112089 112091
Tokens: ['Tau', '##k']
Predictions: [13, 11]
Truth: 13
Final prediction: 11 

112091 112093
Tokens: ['Kinder', '##ren']
Predictions: [13, 11]
Truth: 13
Final prediction: 11 

112106 112108
Tokens: ['hiru', '##rak']
Predictions: [10, 9]
Truth: 10
Final prediction: 9 

112110 112113
Tokens: ['Osa', '##suna', '##ko']
Predictions: [13, 13, 11]
Truth: 13
Final prediction: 13 

112117 112119
Tokens: ['Fernandez', '##ek']
Predictions: [13, 12]
Truth: 13
Final prediction: 12 

112122 112125
Tokens: ['iga', '##nde', '##an']
Predictions: [9, 9, 18]
Truth: 9
Final prediction: 9 

112129 112131
Tokens: ['dire', '##la']
Predictions: [17, 12]
Truth: 5
Final prediction: 17 

112139 112142
Tokens: ['ja', '##rt', '##zea']
Predictions: [17,

Final prediction: 10 

116178 116181
Tokens: ['flota', '##rent', '##zat']
Predictions: [9, 18, 18]
Truth: 9
Final prediction: 18 

116186 116191
Tokens: ['ba', '##xu', '##rako', '##aren', '##tzat']
Predictions: [2, 2, 2, 18, 9]
Truth: 9
Final prediction: 2 

116219 116222
Tokens: ['E', '##B', '##ren']
Predictions: [9, 9, 2]
Truth: 13
Final prediction: 9 

116229 116231
Tokens: ['konpontze', '##ra']
Predictions: [2, 4]
Truth: 17
Final prediction: 2 

116239 116241
Tokens: ['ekonomi', '##en']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

116242 116247
Tokens: ['ego', '##kit', '##za', '##pena', '##k']
Predictions: [9, 9, 9, 9, 18]
Truth: 9
Final prediction: 9 

116247 116250
Tokens: ['lor', '##tzer', '##a']
Predictions: [17, 17, 4]
Truth: 17
Final prediction: 17 

116258 116262
Tokens: ['A', '##gir', '##iare', '##kin']
Predictions: [13, 13, 17, 4]
Truth: 9
Final prediction: 13 

116263 116266
Tokens: ['ja', '##rrer', '##a']
Predictions: [9, 9, 18]
Truth: 9
Final prediction: 9 

11633

120609 120612
Tokens: ['model', '##atz', '##eko']
Predictions: [17, 17, 7]
Truth: 17
Final prediction: 17 

120621 120623
Tokens: ['bere', '##zia']
Predictions: [2, 9]
Truth: 2
Final prediction: 9 

120632 120635
Tokens: ['duena', '##rent', '##zat']
Predictions: [12, 15, 15]
Truth: 5
Final prediction: 15 

120636 120639
Tokens: ['Parti', '##dare', '##n']
Predictions: [9, 9, 2]
Truth: 9
Final prediction: 9 

120639 120642
Tokens: ['hasta', '##pene', '##an']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

120647 120649
Tokens: ['euskal', '##dun']
Predictions: [2, 9]
Truth: 2
Final prediction: 9 

120649 120652
Tokens: ['bat', '##zu', '##ek']
Predictions: [9, 9, 10]
Truth: 7
Final prediction: 9 

120664 120667
Tokens: ['biz', '##kar', '##rean']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

120669 120671
Tokens: ['hai', '##ekin']
Predictions: [12, 3]
Truth: 7
Final prediction: 3 

120678 120680
Tokens: ['zela', '##ian']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

12068

124438 124440
Tokens: ['Ai', '##ta']
Predictions: [8, 18]
Truth: 9
Final prediction: 8 

124445 124447
Tokens: ['eskola', '##ko']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

124447 124449
Tokens: ['at', '##ean']
Predictions: [9, 18]
Truth: 9
Final prediction: 9 

124479 124482
Tokens: ['egon', '##kor', '##rei']
Predictions: [2, 2, 17]
Truth: 2
Final prediction: 2 

124483 124487
Tokens: ['e', '##baki', '##dura', '##ri']
Predictions: [9, 9, 9, 3]
Truth: 9
Final prediction: 9 

124488 124492
Tokens: ['as', '##etas', '##una', '##ri']
Predictions: [9, 9, 9, 3]
Truth: 9
Final prediction: 9 

124493 124496
Tokens: ['dago', '##zki', '##e']
Predictions: [17, 17, 8]
Truth: 17
Final prediction: 17 

124508 124511
Tokens: ['hiri', '##burua', '##n']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

124511 124514
Tokens: ['ok', '##erre', '##ra']
Predictions: [2, 2, 4]
Truth: 2
Final prediction: 2 

124525 124528
Tokens: ['independent', '##zia', '##ri']
Predictions: [9, 9, 3]
Truth: 9
Fi

Truth: 13
Final prediction: 13 

128561 128563
Tokens: ['talde', '##k']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

128571 128574
Tokens: ['honen', '##gati', '##k']
Predictions: [12, 5, 4]
Truth: 7
Final prediction: 4 

128575 128577
Tokens: ['Ho', '##rren']
Predictions: [2, 10]
Truth: 7
Final prediction: 2 

128579 128582
Tokens: ['mart', '##xo', '##aren']
Predictions: [13, 13, 3]
Truth: 9
Final prediction: 13 

128582 128584
Tokens: ['8', '##ko']
Predictions: [10, 3]
Truth: 10
Final prediction: 10 

128588 128591
Tokens: ['De', '##kret', '##uaren']
Predictions: [9, 9, 18]
Truth: 9
Final prediction: 9 

128594 128603
Tokens: ['ih', '##ard', '##un', '##-', '##anto', '##laki', '##dete', '##gia', '##k']
Predictions: [9, 9, 9, 9, 9, 9, 9, 9, 18]
Truth: 9
Final prediction: 9 

128603 128607
Tokens: ['ih', '##ard', '##unea', '##n']
Predictions: [9, 9, 9, 3]
Truth: 2
Final prediction: 9 

128612 128615
Tokens: ['zi', '##bile', '##rako']
Predictions: [2, 9, 2]
Truth: 2
Final predictio


133488 133491
Tokens: ['so', '##vitt', '##u']
Predictions: [17, 17, 2]
Truth: 17
Final prediction: 17 

133499 133509
Tokens: ['HF', '##C', '##-', '##kas', '##vih', '##uo', '##nek', '##aa', '##su', '##ja']
Predictions: [13, 13, 13, 13, 13, 13, 9, 13, 13, 13]
Truth: 9
Final prediction: 13 

133515 133522
Tokens: ['ym', '##p', '##äri', '##stö', '##oh', '##jel', '##man']
Predictions: [13, 13, 13, 13, 9, 9, 13]
Truth: 9
Final prediction: 13 

133535 133537
Tokens: ['t', '##ällä']
Predictions: [12, 7]
Truth: 12
Final prediction: 12 

133544 133547
Tokens: ['%', '##:', '##lla']
Predictions: [16, 4, 4]
Truth: 16
Final prediction: 4 

133548 133551
Tokens: ['m', '##rd', '##.']
Predictions: [10, 10, 18]
Truth: 10
Final prediction: 10 

133575 133577
Tokens: ['het', '##kellä']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

133605 133608
Tokens: ['tun', '##tum', '##assa']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

133625 133627
Tokens: ['nii', '##lle']
Predictions: [12, 2]
Truth: 

140768 140772
Tokens: ['kes', '##tä', '##nee', '##stä']
Predictions: [17, 17, 17, 9]
Truth: 17
Final prediction: 17 

140786 140789
Tokens: ['VW', '##:', '##n']
Predictions: [13, 13, 11]
Truth: 9
Final prediction: 13 

140804 140809
Tokens: ['ty', '##öp', '##aik', '##kan', '##sa']
Predictions: [9, 9, 9, 9, 12]
Truth: 9
Final prediction: 9 

140817 140820
Tokens: ['VW', '##:', '##lle']
Predictions: [13, 13, 17]
Truth: 9
Final prediction: 13 

140840 140843
Tokens: ['vara', '##utu', '##maan']
Predictions: [17, 17, 3]
Truth: 17
Final prediction: 17 

140843 140846
Tokens: ['ku', '##lui', '##hin']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

140850 140852
Tokens: ['euro', '##lla']
Predictions: [13, 4]
Truth: 9
Final prediction: 4 

140858 140861
Tokens: ['kes', '##ä', '##kuussa']
Predictions: [13, 13, 4]
Truth: 9
Final prediction: 13 

140866 140870
Tokens: ['Winter', '##kor', '##nii', '##n']
Predictions: [13, 13, 13, 3]
Truth: 13
Final prediction: 13 

140881 140884
Tokens: ['t',

149247 149250
Tokens: ['so', '##das', '##ta']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

149251 149253
Tokens: ['Ol', '##i']
Predictions: [12, 5]
Truth: 5
Final prediction: 12 

149312 149315
Tokens: ['Us', '##kota', '##an']
Predictions: [17, 17, 5]
Truth: 17
Final prediction: 17 

149338 149342
Tokens: ['tai', '##do', '##illa', '##an']
Predictions: [9, 9, 9, 4]
Truth: 9
Final prediction: 9 

149343 149347
Tokens: ['ky', '##vy', '##illä', '##än']
Predictions: [2, 2, 2, 4]
Truth: 9
Final prediction: 2 

149389 149392
Tokens: ['kana', '##ali', '##in']
Predictions: [9, 9, 13]
Truth: 9
Final prediction: 9 

149465 149468
Tokens: ['kita', '##anie', '##n']
Predictions: [2, 9, 3]
Truth: 9
Final prediction: 9 

149470 149474
Tokens: ['-', '##dyn', '##asti', '##aan']
Predictions: [13, 9, 13, 13]
Truth: 9
Final prediction: 13 

149498 149501
Tokens: ['ra', '##joj', '##aan']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

149572 149575
Tokens: ['ko', '##ulu', '##kuntaan']
Predict

Predictions: [13, 11]
Truth: 13
Final prediction: 11 

157166 157170
Tokens: ['Su', '##ur', '##valla', '##t']
Predictions: [9, 9, 13, 13]
Truth: 9
Final prediction: 9 

157219 157222
Tokens: ['per', '##usta', '##misesta']
Predictions: [17, 17, 15]
Truth: 9
Final prediction: 17 

157237 157240
Tokens: ['os', '##mane', '##ihin']
Predictions: [2, 2, 9]
Truth: 9
Final prediction: 2 

157254 157257
Tokens: ['selv', '##ä', '##ksi']
Predictions: [2, 2, 4]
Truth: 2
Final prediction: 2 

157277 157279
Tokens: ['Albania', '##an']
Predictions: [13, 3]
Truth: 13
Final prediction: 3 

157304 157309
Tokens: ['alle', '##kir', '##jo', '##itetu', '##ssa']
Predictions: [17, 17, 17, 17, 2]
Truth: 17
Final prediction: 17 

157333 157338
Tokens: ['v', '##äl', '##itt', '##ämä', '##ttä']
Predictions: [17, 17, 17, 2, 2]
Truth: 17
Final prediction: 17 

157377 157379
Tokens: ['kes', '##ken']
Predictions: [4, 3]
Truth: 3
Final prediction: 3 

157433 157435
Tokens: ['jo', '##ihin']
Predictions: [12, 3]
Truth: 12

Predictions: [17, 17, 17, 12]
Truth: 9
Final prediction: 17 

165144 165148
Tokens: ['la', '##aje', '##nta', '##misesta']
Predictions: [17, 17, 17, 9]
Truth: 9
Final prediction: 17 

165154 165158
Tokens: ['nimi', '##ttä', '##mises', '##tä']
Predictions: [17, 17, 9, 15]
Truth: 9
Final prediction: 17 

165158 165161
Tokens: ['arme', '##ija', '##lle']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

165167 165172
Tokens: ['levy', '##-', '##y', '##htiö', '##lleen']
Predictions: [9, 9, 9, 9, 12]
Truth: 9
Final prediction: 9 

165176 165178
Tokens: ['Company', '##lle']
Predictions: [13, 3]
Truth: 13
Final prediction: 3 

165209 165212
Tokens: ['Don', "##'", '##t']
Predictions: [13, 11, 11]
Truth: 13
Final prediction: 11 

165258 165260
Tokens: ['puolella', '##an']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

165296 165299
Tokens: ['pai', '##kkan', '##sa']
Predictions: [9, 9, 12]
Truth: 9
Final prediction: 9 

165299 165302
Tokens: ['tribu', '##unis', '##sa']
Predictions: [9, 9, 1

169073 169076
Tokens: ['ק', '##ל', '##ע']
Predictions: [13, 17, 17]
Truth: 17
Final prediction: 17 

169083 169087
Tokens: ['ו', '##רוב', '##רט', '##ס']
Predictions: [6, 13, 13, 13]
Truth: 1
Final prediction: 13 

169102 169104
Tokens: ['מה', '##סל']
Predictions: [3, 9]
Truth: 1
Final prediction: 9 

169112 169115
Tokens: ['ה', '##ע', '##מק']
Predictions: [13, 9, 13]
Truth: 1
Final prediction: 13 

169118 169120
Tokens: ['ה', '##אחרים']
Predictions: [7, 2]
Truth: 1
Final prediction: 2 

169137 169141
Tokens: ['מ', '##לה', '##פת', '##יע']
Predictions: [3, 11, 17, 17]
Truth: 1
Final prediction: 17 

169142 169145
Tokens: ['לה', '##פת', '##יע']
Predictions: [11, 17, 17]
Truth: 17
Final prediction: 17 

169146 169150
Tokens: ['ח', '##ני', '##כי', '##ו']
Predictions: [13, 13, 13, 12]
Truth: 1
Final prediction: 13 

169175 169178
Tokens: ['מי', '##כול', '##תם']
Predictions: [3, 9, 12]
Truth: 1
Final prediction: 9 

169179 169181
Tokens: ['יכולת', '##_']
Predictions: [9, 0]
Truth: 9
Final pre

Truth: 1
Final prediction: 9 

172490 172493
Tokens: ['ש', '##נוע', '##דה']
Predictions: [12, 17, 17]
Truth: 1
Final prediction: 17 

172494 172497
Tokens: ['נ', '##ועד', '##ה']
Predictions: [5, 17, 5]
Truth: 17
Final prediction: 5 

172497 172499
Tokens: ['ל', '##משפחת']
Predictions: [3, 13]
Truth: 1
Final prediction: 3 

172513 172516
Tokens: ['ה', '##דיר', '##ות']
Predictions: [2, 9, 9]
Truth: 1
Final prediction: 9 

172519 172523
Tokens: ['י', '##יש', '##אר', '##ו']
Predictions: [5, 17, 17, 17]
Truth: 17
Final prediction: 17 

172534 172537
Tokens: ['ל', '##ת', '##שלום']
Predictions: [3, 9, 9]
Truth: 1
Final prediction: 9 

172540 172543
Tokens: ['י', '##תר', '##ת']
Predictions: [9, 2, 9]
Truth: 9
Final prediction: 9 

172543 172545
Tokens: ['ה', '##מס']
Predictions: [7, 9]
Truth: 1
Final prediction: 9 

172550 172552
Tokens: ['נ', '##קבע']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

172556 172559
Tokens: ['ה', '##ע', '##סקה']
Predictions: [7, 9, 9]
Truth: 1
Final predict

175484 175488
Tokens: ['ל', '##גי', '##טר', '##ה']
Predictions: [3, 9, 9, 9]
Truth: 1
Final prediction: 9 

175492 175496
Tokens: ['ו', '##לר', '##ביע', '##יית']
Predictions: [6, 4, 9, 9]
Truth: 1
Final prediction: 9 

175507 175510
Tokens: ['ב', '##צד', '##ק']
Predictions: [3, 9, 9]
Truth: 1
Final prediction: 9 

175515 175518
Tokens: ['יו', '##צר', '##ן']
Predictions: [13, 9, 13]
Truth: 1
Final prediction: 13 

175518 175521
Tokens: ['יו', '##צר', '##_']
Predictions: [13, 13, 0]
Truth: 9
Final prediction: 13 

175521 175524
Tokens: ['_', '##של', '##_']
Predictions: [8, 8, 0]
Truth: 3
Final prediction: 8 

175531 175534
Tokens: ['כ', '##קל', '##יל']
Predictions: [3, 13, 13]
Truth: 1
Final prediction: 13 

175538 175542
Tokens: ['ו', '##נוס', '##טל', '##גי']
Predictions: [6, 2, 2, 2]
Truth: 1
Final prediction: 2 

175556 175560
Tokens: ['כ', '##תו', '##לד', '##ה']
Predictions: [3, 17, 17, 2]
Truth: 1
Final prediction: 17 

175568 175570
Tokens: ['ב', '##_']
Predictions: [3, 8]
Truth: 3

179581 179585
Tokens: ['ה', '##ס', '##גו', '##לי']
Predictions: [7, 2, 2, 2]
Truth: 1
Final prediction: 2 

179595 179598
Tokens: ['ה', '##מחוז', '##יים']
Predictions: [7, 13, 13]
Truth: 1
Final prediction: 13 

179603 179605
Tokens: ['ה', '##_']
Predictions: [13, 0]
Truth: 7
Final prediction: 0 

179609 179611
Tokens: ['משפט', '##ית']
Predictions: [2, 13]
Truth: 2
Final prediction: 2 

179623 179625
Tokens: ['ו', '##יש']
Predictions: [6, 17]
Truth: 1
Final prediction: 17 

179629 179633
Tokens: ['ה', '##טו', '##ע', '##נים']
Predictions: [12, 17, 17, 17]
Truth: 1
Final prediction: 17 

179639 179642
Tokens: ['ה', '##תו', '##צאה']
Predictions: [7, 9, 9]
Truth: 1
Final prediction: 9 

179643 179645
Tokens: ['ת', '##וצאה']
Predictions: [2, 9]
Truth: 9
Final prediction: 9 

179647 179649
Tokens: ['יחיד', '##ה']
Predictions: [2, 9]
Truth: 2
Final prediction: 9 

179649 179651
Tokens: ['ה', '##יתה']
Predictions: [5, 17]
Truth: 5
Final prediction: 17 

179654 179657
Tokens: ['ה', '##עו', '##מ

Tokens: ['עמ', '##ודי']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

182891 182894
Tokens: ['ב', '##ד', '##פוס']
Predictions: [3, 9, 9]
Truth: 1
Final prediction: 9 

182900 182903
Tokens: ['ו', '##ע', '##נייני']
Predictions: [6, 2, 2]
Truth: 1
Final prediction: 2 

182909 182911
Tokens: ['ה', '##_']
Predictions: [7, 0]
Truth: 7
Final prediction: 0 

182923 182927
Tokens: ['ה', '##מ', '##פרט', '##ת']
Predictions: [12, 17, 17, 17]
Truth: 1
Final prediction: 17 

182935 182937
Tokens: ['חי', '##יהם']
Predictions: [9, 12]
Truth: 1
Final prediction: 9 

182937 182939
Tokens: ['חיים', '##_']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

182939 182942
Tokens: ['_', '##של', '##_']
Predictions: [8, 10, 0]
Truth: 3
Final prediction: 8 

182942 182944
Tokens: ['_', '##הם']
Predictions: [14, 12]
Truth: 12
Final prediction: 12 

182944 182949
Tokens: ['ו', '##הי', '##ש', '##גיה', '##ם']
Predictions: [6, 9, 9, 9, 12]
Truth: 1
Final prediction: 9 

182950 182954
Tokens: ['ה', '##יש', '##ג

Tokens: ['ל', '##דור', '##ות', '##יה']
Predictions: [3, 9, 9, 12]
Truth: 1
Final prediction: 9 

185916 185919
Tokens: ['דו', '##ר', '##_']
Predictions: [13, 13, 0]
Truth: 9
Final prediction: 13 

185919 185922
Tokens: ['_', '##של', '##_']
Predictions: [13, 13, 0]
Truth: 3
Final prediction: 13 

185940 185944
Tokens: ['ו', '##ה', '##קי', '##ום']
Predictions: [6, 7, 9, 9]
Truth: 1
Final prediction: 9 

185951 185955
Tokens: ['כ', '##מי', '##עו', '##ט']
Predictions: [3, 9, 9, 9]
Truth: 1
Final prediction: 9 

185970 185973
Tokens: ['וג', '##יב', '##ושה']
Predictions: [6, 9, 13]
Truth: 1
Final prediction: 9 

185974 185978
Tokens: ['ג', '##יב', '##וש', '##_']
Predictions: [9, 9, 9, 0]
Truth: 9
Final prediction: 9 

185978 185981
Tokens: ['_', '##של', '##_']
Predictions: [8, 8, 0]
Truth: 3
Final prediction: 8 

185998 186001
Tokens: ['אי', '##מו', '##ץ']
Predictions: [17, 17, 13]
Truth: 9
Final prediction: 17 

186011 186015
Tokens: ['מ', '##א', '##פי', '##ינים']
Predictions: [2, 9, 9, 9]


189698 189701
Tokens: ['ר', '##ות', '##חים']
Predictions: [17, 17, 2]
Truth: 2
Final prediction: 17 

189709 189714
Tokens: ['ר', '##כר', '##ו', '##כי', '##ות']
Predictions: [2, 2, 2, 2, 9]
Truth: 2
Final prediction: 2 

189720 189724
Tokens: ['ה', '##ג', '##בי', '##נה']
Predictions: [7, 9, 9, 9]
Truth: 1
Final prediction: 9 

189736 189739
Tokens: ['_', '##הו', '##א']
Predictions: [9, 9, 2]
Truth: 12
Final prediction: 9 

189739 189742
Tokens: ['ה', '##ט', '##עם']
Predictions: [7, 9, 9]
Truth: 1
Final prediction: 9 

189752 189756
Tokens: ['ו', '##ה', '##ברו', '##ר']
Predictions: [6, 2, 2, 2]
Truth: 1
Final prediction: 2 

189775 189778
Tokens: ['ה', '##נח', '##שבת']
Predictions: [12, 17, 17]
Truth: 1
Final prediction: 17 

189781 189784
Tokens: ['ב', '##עי', '##ני']
Predictions: [3, 9, 13]
Truth: 1
Final prediction: 9 

189791 189793
Tokens: ['ה', '##_']
Predictions: [13, 0]
Truth: 7
Final prediction: 0 

189800 189802
Tokens: ['ב', '##איטליה']
Predictions: [3, 13]
Truth: 1
Final pre

193524 193527
Tokens: ['לא', '##יט', '##ליה']
Predictions: [3, 13, 13]
Truth: 1
Final prediction: 13 

193551 193553
Tokens: ['ו', '##קפה']
Predictions: [6, 9]
Truth: 1
Final prediction: 9 

193556 193558
Tokens: ['ל', '##סיום']
Predictions: [3, 9]
Truth: 1
Final prediction: 9 

193564 193568
Tokens: ['ה', '##מ', '##בוסס', '##ת']
Predictions: [12, 17, 17, 17]
Truth: 1
Final prediction: 17 

193573 193575
Tokens: ['ה', '##מנות']
Predictions: [7, 9]
Truth: 1
Final prediction: 9 

193579 193582
Tokens: ['ה', '##מח', '##יר']
Predictions: [7, 9, 9]
Truth: 1
Final prediction: 9 

193600 193602
Tokens: ['ב', '##סך']
Predictions: [3, 9]
Truth: 1
Final prediction: 9 

193615 193618
Tokens: ['בה', '##ת', '##חשב']
Predictions: [15, 17, 17]
Truth: 3
Final prediction: 17 

193618 193621
Tokens: ['ב', '##מח', '##יר']
Predictions: [3, 9, 9]
Truth: 1
Final prediction: 9 

193622 193624
Tokens: ['ה', '##_']
Predictions: [9, 0]
Truth: 7
Final prediction: 0 

193636 193639
Tokens: ['ה', '##בע', '##לים']


208895 208898
Tokens: ['mini', '##star', '##stvu']
Predictions: [9, 9, 13]
Truth: 9
Final prediction: 9 

208913 208915
Tokens: ['2009', '##.']
Predictions: [10, 14]
Truth: 2
Final prediction: 10 

209103 209107
Tokens: ['ori', '##jent', '##iran', '##e']
Predictions: [17, 17, 17, 2]
Truth: 2
Final prediction: 17 

209185 209187
Tokens: ['treba', '##la']
Predictions: [17, 5]
Truth: 17
Final prediction: 17 

209274 209278
Tokens: ['pot', '##ak', '##nul', '##a']
Predictions: [17, 17, 17, 12]
Truth: 17
Final prediction: 17 

209299 209301
Tokens: ['2006', '##.']
Predictions: [10, 14]
Truth: 2
Final prediction: 10 

209302 209304
Tokens: ['2008', '##.']
Predictions: [10, 14]
Truth: 2
Final prediction: 10 

209356 209358
Tokens: ['moral', '##o']
Predictions: [17, 5]
Truth: 17
Final prediction: 17 

209393 209396
Tokens: ['ot', '##vara', '##nju']
Predictions: [17, 17, 9]
Truth: 9
Final prediction: 17 

209436 209438
Tokens: ['ot', '##varanje']
Predictions: [17, 9]
Truth: 9
Final prediction: 1

229395 229397
Tokens: ['na', '##šem']
Predictions: [12, 2]
Truth: 7
Final prediction: 2 

229397 229401
Tokens: ['še', '##sna', '##ester', '##cu']
Predictions: [13, 13, 9, 9]
Truth: 9
Final prediction: 9 

229565 229567
Tokens: ['bis', '##mo']
Predictions: [5, 12]
Truth: 5
Final prediction: 12 

229634 229636
Tokens: ['1997', '##.']
Predictions: [10, 14]
Truth: 2
Final prediction: 10 

229644 229646
Tokens: ['taka', '##v']
Predictions: [12, 2]
Truth: 7
Final prediction: 2 

229660 229662
Tokens: ['1998', '##.']
Predictions: [10, 0]
Truth: 2
Final prediction: 0 

229664 229666
Tokens: ['Š', '##to']
Predictions: [8, 12]
Truth: 12
Final prediction: 8 

229688 229690
Tokens: ['vraća', '##njem']
Predictions: [17, 9]
Truth: 9
Final prediction: 17 

229864 229866
Tokens: ['kup', '##nju']
Predictions: [17, 9]
Truth: 9
Final prediction: 17 

229928 229930
Tokens: ['Sv', '##i']
Predictions: [8, 5]
Truth: 2
Final prediction: 8 

229935 229939
Tokens: ['iz', '##raža', '##vam', '##o']
Predictions: 

239388 239391
Tokens: ['向', '##か', '##っ']
Predictions: [17, 8, 8]
Truth: 17
Final prediction: 8 

239408 239410
Tokens: ['最', '##近']
Predictions: [4, 2]
Truth: 9
Final prediction: 2 

239429 239431
Tokens: ['で', '##す']
Predictions: [3, 8]
Truth: 5
Final prediction: 8 

239433 239439
Tokens: ['国', '##連', '##環', '##境', '##計', '##画']
Predictions: [13, 13, 0, 13, 13, 0]
Truth: 9
Final prediction: 13 

239454 239456
Tokens: ['述', '##べ']
Predictions: [17, 8]
Truth: 17
Final prediction: 8 

239472 239474
Tokens: ['上', '##昇']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

239484 239487
Tokens: ['パリ', '##協', '##定']
Predictions: [9, 0, 9]
Truth: 9
Final prediction: 9 

239501 239504
Tokens: ['現', '##時', '##点']
Predictions: [2, 9, 9]
Truth: 9
Final prediction: 9 

239527 239529
Tokens: ['6', '##月']
Predictions: [10, 0]
Truth: 9
Final prediction: 0 

239537 239539
Tokens: ['海', '##岸']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

239545 239547
Tokens: ['実', '##施']
Predictions: [9, 0]
Tr

244375 244377
Tokens: ['語', '##源']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

244396 244398
Tokens: ['自', '##身']
Predictions: [12, 13]
Truth: 9
Final prediction: 12 

244447 244449
Tokens: ['展', '##示']
Predictions: [17, 13]
Truth: 17
Final prediction: 17 

244461 244463
Tokens: ['軍', '##事']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

244485 244488
Tokens: ['選', '##挙', '##区']
Predictions: [9, 9, 0]
Truth: 9
Final prediction: 9 

244501 244503
Tokens: ['そ', '##こ']
Predictions: [12, 4]
Truth: 12
Final prediction: 12 

244505 244508
Tokens: ['有', '##権', '##者']
Predictions: [2, 13, 13]
Truth: 9
Final prediction: 13 

244522 244525
Tokens: ['納', '##税', '##者']
Predictions: [14, 0, 0]
Truth: 9
Final prediction: 0 

244544 244547
Tokens: ['痛', '##ま', '##しく']
Predictions: [2, 2, 4]
Truth: 2
Final prediction: 2 

244609 244614
Tokens: ['自', '##由', '##民', '##主', '##党']
Predictions: [13, 13, 13, 13, 0]
Truth: 13
Final prediction: 13 

244636 244638
Tokens: ['た', '##い']
Predictions: [

Predictions: [9, 8, 8, 8]
Truth: 2
Final prediction: 8 

250885 250887
Tokens: ['こ', '##と']
Predictions: [8, 9]
Truth: 15
Final prediction: 8 

250888 250891
Tokens: ['始', '##ま', '##る']
Predictions: [17, 17, 8]
Truth: 17
Final prediction: 17 

250968 250970
Tokens: ['光', '##り']
Predictions: [2, 17]
Truth: 17
Final prediction: 17 

251027 251029
Tokens: ['の', '##で']
Predictions: [8, 6]
Truth: 15
Final prediction: 8 

251041 251043
Tokens: ['完', '##全']
Predictions: [2, 4]
Truth: 2
Final prediction: 2 

251082 251086
Tokens: ['起', '##ち', '##上', '##げ']
Predictions: [17, 18, 18, 18]
Truth: 17
Final prediction: 18 

251098 251100
Tokens: ['当', '##社']
Predictions: [13, 12]
Truth: 9
Final prediction: 12 

251134 251137
Tokens: ['主', '##た', '##る']
Predictions: [9, 8, 8]
Truth: 2
Final prediction: 8 

251137 251140
Tokens: ['推', '##進', '##力']
Predictions: [9, 9, 0]
Truth: 9
Final prediction: 9 

251172 251175
Tokens: ['見', '##な', '##す']
Predictions: [17, 17, 8]
Truth: 17
Final prediction: 17 

2

Final prediction: 9 

257828 257832
Tokens: ['公', '##共', '##空', '##間']
Predictions: [9, 9, 0, 9]
Truth: 9
Final prediction: 9 

257891 257895
Tokens: ['中', '##華', '##民', '##国']
Predictions: [13, 13, 0, 13]
Truth: 13
Final prediction: 13 

257916 257920
Tokens: ['中', '##華', '##民', '##国']
Predictions: [8, 14, 14, 13]
Truth: 13
Final prediction: 14 

257924 257927
Tokens: ['で', '##あ', '##っ']
Predictions: [3, 8, 8]
Truth: 5
Final prediction: 8 

257943 257946
Tokens: ['後', '##継', '##者']
Predictions: [9, 14, 14]
Truth: 9
Final prediction: 14 

257953 257956
Tokens: ['果', '##た', '##す']
Predictions: [17, 17, 14]
Truth: 17
Final prediction: 17 

257961 257963
Tokens: ['清', '##朝']
Predictions: [8, 0]
Truth: 9
Final prediction: 8 

257990 257992
Tokens: ['没', '##落']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

258005 258007
Tokens: ['移', '##行']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

258028 258030
Tokens: ['語', '##っ']
Predictions: [17, 14]
Truth: 17
Final prediction: 17 

25

264339 264341
Tokens: ['気', '##温']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

264347 264350
Tokens: ['で', '##あ', '##ろ']
Predictions: [6, 8, 17]
Truth: 5
Final prediction: 8 

264356 264358
Tokens: ['年', '##間']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

264364 264366
Tokens: ['健', '##康']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

264462 264464
Tokens: ['最', '##高']
Predictions: [4, 2]
Truth: 9
Final prediction: 2 

264471 264473
Tokens: ['人', '##材']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

264481 264483
Tokens: ['好', '##ん']
Predictions: [8, 14]
Truth: 17
Final prediction: 8 

264504 264507
Tokens: ['有', '##名', '##人']
Predictions: [2, 2, 9]
Truth: 9
Final prediction: 2 

264510 264512
Tokens: ['活', '##動']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

264572 264574
Tokens: ['訪', '##問']
Predictions: [17, 18]
Truth: 17
Final prediction: 17 

264600 264602
Tokens: ['紹', '##介']
Predictions: [17, 9]
Truth: 17
Final prediction: 17 

264610 264612
Tokens:

270946 270948
Tokens: ['選', '##出']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

270958 270960
Tokens: ['年', '##間']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

270972 270974
Tokens: ['確', '##信']
Predictions: [17, 2]
Truth: 17
Final prediction: 17 

270992 270994
Tokens: ['で', '##き']
Predictions: [17, 2]
Truth: 17
Final prediction: 17 

271009 271011
Tokens: ['で', '##き']
Predictions: [17, 2]
Truth: 17
Final prediction: 17 

271019 271021
Tokens: ['嘲', '##笑']
Predictions: [9, 0]
Truth: 17
Final prediction: 0 

271024 271026
Tokens: ['番', '##組']
Predictions: [13, 0]
Truth: 9
Final prediction: 0 

271045 271048
Tokens: ['火', '##曜', '##日']
Predictions: [8, 18, 8]
Truth: 9
Final prediction: 8 

271052 271054
Tokens: ['歴', '##史']
Predictions: [2, 9]
Truth: 9
Final prediction: 9 

271092 271094
Tokens: ['年', '##間']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

271095 271097
Tokens: ['大', '##陸']
Predictions: [13, 9]
Truth: 9
Final prediction: 9 

271124 271127
Tokens: ['保', '

277842 277845
Tokens: ['意', '##図', '##的']
Predictions: [9, 9, 0]
Truth: 2
Final prediction: 9 

277879 277883
Tokens: ['造', '##山', '##運', '##動']
Predictions: [2, 2, 0, 9]
Truth: 9
Final prediction: 2 

277884 277886
Tokens: ['何', '##度']
Predictions: [10, 0]
Truth: 9
Final prediction: 0 

277899 277902
Tokens: ['中', '##心', '##部']
Predictions: [9, 2, 9]
Truth: 9
Final prediction: 9 

277972 277974
Tokens: ['こ', '##と']
Predictions: [8, 9]
Truth: 15
Final prediction: 8 

277981 277983
Tokens: ['打', '##倒']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

278016 278019
Tokens: ['合', '##法', '##化']
Predictions: [2, 10, 0]
Truth: 17
Final prediction: 0 

278027 278029
Tokens: ['全', '##て']
Predictions: [8, 13]
Truth: 9
Final prediction: 8 

278032 278034
Tokens: ['戻', '##っ']
Predictions: [17, 8]
Truth: 17
Final prediction: 8 

278044 278046
Tokens: ['残', '##っ']
Predictions: [17, 8]
Truth: 17
Final prediction: 8 

278049 278052
Tokens: ['政', '##治', '##犯']
Predictions: [9, 9, 0]
Truth: 9
Final

Final prediction: 5 

281345 281347
Tokens: ['모두', '##가']
Predictions: [12, 5]
Truth: 9
Final prediction: 12 

281347 281350
Tokens: ['그', '##것', '##에']
Predictions: [7, 9, 3]
Truth: 12
Final prediction: 9 

281358 281360
Tokens: ['아', '##니다']
Predictions: [8, 2]
Truth: 2
Final prediction: 8 

281361 281363
Tokens: ['폴', '##은']
Predictions: [13, 5]
Truth: 13
Final prediction: 5 

281386 281389
Tokens: ['너', '##희', '##들']
Predictions: [12, 12, 13]
Truth: 12
Final prediction: 12 

281391 281396
Tokens: ['알', '##아', '##차', '##렸', '##어']
Predictions: [17, 17, 17, 17, 8]
Truth: 17
Final prediction: 17 

281399 281402
Tokens: ['물', '##었다', '##고']
Predictions: [17, 17, 14]
Truth: 11
Final prediction: 17 

281405 281407
Tokens: ['년', '##에']
Predictions: [10, 3]
Truth: 9
Final prediction: 10 

281412 281415
Tokens: ['반', '##도에', '##서']
Predictions: [13, 13, 4]
Truth: 9
Final prediction: 13 

281417 281419
Tokens: ['러시아', '##의']
Predictions: [13, 11]
Truth: 13
Final prediction: 11 

281433 28143

Predictions: [9, 9, 6]
Truth: 9
Final prediction: 9 

284043 284046
Tokens: ['생', '##각', '##이']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

284050 284053
Tokens: ['교', '##차', '##하고']
Predictions: [17, 17, 6]
Truth: 9
Final prediction: 17 

284053 284056
Tokens: ['갈', '##등', '##의']
Predictions: [9, 9, 11]
Truth: 9
Final prediction: 9 

284056 284060
Tokens: ['파', '##열', '##음', '##이']
Predictions: [9, 9, 9, 5]
Truth: 9
Final prediction: 9 

284066 284069
Tokens: ['주', '##변', '##에서']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

284077 284080
Tokens: ['데', '##이터', '##와']
Predictions: [9, 9, 6]
Truth: 9
Final prediction: 9 

284087 284091
Tokens: ['불', '##일', '##치', '##하는']
Predictions: [17, 17, 2, 2]
Truth: 9
Final prediction: 17 

284102 284104
Tokens: ['이어', '##야']
Predictions: [17, 4]
Truth: 5
Final prediction: 17 

284109 284113
Tokens: ['경', '##제', '##학자', '##들은']
Predictions: [9, 9, 9, 5]
Truth: 9
Final prediction: 9 

284113 284117
Tokens: ['공', '##화', '##당', '##의

Final prediction: 2 

286642 286645
Tokens: ['순', '##진', '##해']
Predictions: [2, 2, 4]
Truth: 9
Final prediction: 2 

286650 286653
Tokens: ['최', '##악', '##의']
Predictions: [2, 2, 3]
Truth: 9
Final prediction: 2 

286653 286655
Tokens: ['경우', '##에는']
Predictions: [9, 5]
Truth: 9
Final prediction: 9 

286659 286662
Tokens: ['로', '##비', '##를']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

286662 286664
Tokens: ['해', '##도']
Predictions: [17, 6]
Truth: 17
Final prediction: 17 

286664 286666
Tokens: ['봐', '##주는']
Predictions: [17, 7]
Truth: 17
Final prediction: 17 

286685 286688
Tokens: ['지', '##지', '##하고']
Predictions: [17, 17, 6]
Truth: 9
Final prediction: 17 

286688 286691
Tokens: ['말', '##고', '##를']
Predictions: [9, 9, 5]
Truth: 11
Final prediction: 9 

286691 286694
Tokens: ['떠', '##나', '##서']
Predictions: [17, 17, 4]
Truth: 17
Final prediction: 17 

286696 286698
Tokens: ['법', '##과']
Predictions: [9, 6]
Truth: 9
Final prediction: 9 

286704 286708
Tokens: ['도', '##입', '##되'

289319 289321
Tokens: ['것', '##과']
Predictions: [9, 6]
Truth: 9
Final prediction: 9 

289324 289326
Tokens: ['관련', '##이']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

289332 289334
Tokens: ['살', '##이']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

289335 289338
Tokens: ['저', '##팅', '##은']
Predictions: [13, 13, 5]
Truth: 13
Final prediction: 13 

289339 289341
Tokens: ['건', '##의']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

289343 289346
Tokens: ['혐', '##의', '##와']
Predictions: [9, 9, 6]
Truth: 9
Final prediction: 9 

289347 289349
Tokens: ['건', '##의']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

289355 289358
Tokens: ['혐', '##의', '##로']
Predictions: [9, 9, 15]
Truth: 9
Final prediction: 9 

289362 289365
Tokens: ['법', '##원에', '##서']
Predictions: [13, 13, 4]
Truth: 9
Final prediction: 13 

289365 289368
Tokens: ['재', '##판', '##을']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

289372 289374
Tokens: ['내', '##가']
Predictions: [12, 5]
Truth: 12
Final predi

Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

291898 291900
Tokens: ['영화', '##가']
Predictions: [13, 5]
Truth: 9
Final prediction: 5 

291900 291902
Tokens: ['설립', '##되었으며']
Predictions: [17, 3]
Truth: 9
Final prediction: 17 

291903 291905
Tokens: ['년', '##에는']
Predictions: [13, 3]
Truth: 9
Final prediction: 3 

291915 291917
Tokens: ['최고', '##의']
Predictions: [2, 3]
Truth: 9
Final prediction: 2 

291917 291920
Tokens: ['성', '##공', '##을']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

291922 291924
Tokens: ['영화', '##는']
Predictions: [9, 5]
Truth: 9
Final prediction: 9 

291934 291936
Tokens: ['감독', '##한']
Predictions: [17, 3]
Truth: 9
Final prediction: 17 

291940 291942
Tokens: ['였', '##다']
Predictions: [17, 14]
Truth: 5
Final prediction: 17 

291950 291953
Tokens: ['조', '##약', '##이']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

291953 291956
Tokens: ['체', '##결', '##되었으며']
Predictions: [17, 17, 15]
Truth: 9
Final prediction: 17 

291956 291958
Tokens: ['이', '##

294818 294821
Tokens: ['시', '##작', '##하면서']
Predictions: [17, 17, 15]
Truth: 9
Final prediction: 17 

294821 294824
Tokens: ['지', '##표', '##에']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

294824 294827
Tokens: ['노', '##출', '##된']
Predictions: [17, 17, 2]
Truth: 9
Final prediction: 17 

294834 294837
Tokens: ['시', '##작', '##한다']
Predictions: [17, 17, 4]
Truth: 9
Final prediction: 17 

294838 294841
Tokens: ['풍', '##속', '##이']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

294841 294845
Tokens: ['올', '##라', '##가', '##면서']
Predictions: [17, 17, 17, 4]
Truth: 17
Final prediction: 17 

294851 294853
Tokens: ['중', '##으로']
Predictions: [4, 3]
Truth: 9
Final prediction: 3 

294860 294862
Tokens: ['땅', '##에']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

294868 294870
Tokens: ['쳐', '##서']
Predictions: [17, 4]
Truth: 17
Final prediction: 17 

294872 294874
Tokens: ['중', '##으로']
Predictions: [4, 3]
Truth: 9
Final prediction: 3 

294877 294879
Tokens: ['되', '##며']
Predictions

Tokens: ['춘', '##분', '##과']
Predictions: [13, 13, 6]
Truth: 9
Final prediction: 13 

297514 297517
Tokens: ['신', '##월', '##에']
Predictions: [13, 13, 3]
Truth: 9
Final prediction: 13 

297518 297521
Tokens: ['여', '##름', '##과']
Predictions: [9, 9, 6]
Truth: 9
Final prediction: 9 

297521 297524
Tokens: ['겨', '##울', '##에']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

297536 297538
Tokens: ['길', '##이는']
Predictions: [9, 5]
Truth: 9
Final prediction: 9 

297540 297542
Tokens: ['방', '##식으로']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

297542 297545
Tokens: ['측', '##정한', '##다']
Predictions: [17, 17, 12]
Truth: 9
Final prediction: 17 

297552 297555
Tokens: ['사', '##실', '##에']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

297565 297569
Tokens: ['유', '##카', '##탄', '##에서']
Predictions: [13, 13, 13, 3]
Truth: 13
Final prediction: 13 

297572 297576
Tokens: ['시', '##스', '##템', '##을']
Predictions: [9, 9, 9, 5]
Truth: 9
Final prediction: 9 

297585 297588
Tokens: ['단', '##어',

Predictions: [17, 17, 4]
Truth: 11
Final prediction: 17 

300219 300221
Tokens: ['못', '##했다']
Predictions: [4, 14]
Truth: 17
Final prediction: 4 

300222 300227
Tokens: ['알', '##바', '##레', '##즈', '##로']
Predictions: [13, 13, 13, 13, 4]
Truth: 13
Final prediction: 13 

300233 300236
Tokens: ['분', '##야', '##에서']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

300245 300249
Tokens: ['시', '##스', '##템', '##은']
Predictions: [9, 9, 9, 5]
Truth: 9
Final prediction: 9 

300256 300258
Tokens: ['작', '##전에서']
Predictions: [13, 9]
Truth: 9
Final prediction: 9 

300280 300282
Tokens: ['였', '##다']
Predictions: [5, 14]
Truth: 5
Final prediction: 5 

300283 300288
Tokens: ['알', '##바', '##레', '##즈', '##의']
Predictions: [13, 13, 13, 13, 2]
Truth: 13
Final prediction: 13 

300291 300295
Tokens: ['안', '##테', '##나', '##를']
Predictions: [9, 9, 9, 5]
Truth: 9
Final prediction: 9 

300295 300298
Tokens: ['사', '##용', '##해']
Predictions: [17, 17, 15]
Truth: 9
Final prediction: 17 

300306 300309
Tokens: ['

302846 302849
Tokens: ['관', '##심', '##이']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

302865 302867
Tokens: ['독일', '##을']
Predictions: [13, 11]
Truth: 13
Final prediction: 11 

302870 302874
Tokens: ['히', '##틀', '##러', '##와']
Predictions: [13, 13, 13, 6]
Truth: 13
Final prediction: 13 

302876 302880
Tokens: ['관', '##계', '##자', '##들을']
Predictions: [9, 9, 9, 5]
Truth: 9
Final prediction: 9 

302884 302886
Tokens: ['군', '##이']
Predictions: [13, 3]
Truth: 9
Final prediction: 3 

302886 302889
Tokens: ['국', '##경', '##을']
Predictions: [13, 9, 11]
Truth: 9
Final prediction: 9 

302900 302903
Tokens: ['발', '##표', '##에서']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

302903 302905
Tokens: ['독일', '##과']
Predictions: [13, 6]
Truth: 13
Final prediction: 13 

302908 302910
Tokens: ['모두', '##를']
Predictions: [13, 5]
Truth: 9
Final prediction: 5 

302921 302924
Tokens: ['수', '##비', '##대는']
Predictions: [9, 9, 13]
Truth: 9
Final prediction: 9 

302929 302932
Tokens: ['명', '##령', '#

Tokens: ['라', '##벨', '##이']
Predictions: [13, 13, 5]
Truth: 9
Final prediction: 13 

305742 305744
Tokens: ['소속', '##이']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

305744 305747
Tokens: ['아', '##니', '##며']
Predictions: [4, 4, 15]
Truth: 2
Final prediction: 4 

305747 305751
Tokens: ['유', '##튜', '##브', '##와']
Predictions: [13, 13, 13, 6]
Truth: 9
Final prediction: 13 

305751 305754
Tokens: ['계', '##약', '##이']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

305757 305759
Tokens: ['경우', '##에']
Predictions: [9, 5]
Truth: 9
Final prediction: 9 

305760 305763
Tokens: ['신', '##호', '##가']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

305774 305777
Tokens: ['물', '##질', '##은']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

305788 305790
Tokens: ['폐', '##를']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

305798 305801
Tokens: ['청', '##소', '##년']
Predictions: [2, 2, 9]
Truth: 9
Final prediction: 2 

305802 305805
Tokens: ['발', '##달', '##에']
Predictions: [9, 9, 3

Truth: 9
Final prediction: 9 

308519 308522
Tokens: ['그', '##곳', '##은']
Predictions: [7, 9, 5]
Truth: 12
Final prediction: 9 

308528 308531
Tokens: ['주', '##민', '##들에게']
Predictions: [9, 9, 11]
Truth: 9
Final prediction: 9 

308531 308534
Tokens: ['안', '##식', '##과']
Predictions: [9, 9, 6]
Truth: 9
Final prediction: 9 

308536 308538
Tokens: ['공', '##간이']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

308539 308541
Tokens: ['준', '##다']
Predictions: [17, 14]
Truth: 17
Final prediction: 17 

308557 308559
Tokens: ['년', '##대']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

308579 308582
Tokens: ['옥', '##상', '##에']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

308587 308589
Tokens: ['안', '##에']
Predictions: [5, 3]
Truth: 9
Final prediction: 3 

308597 308601
Tokens: ['기', '##념', '##관', '##이']
Predictions: [9, 9, 9, 3]
Truth: 9
Final prediction: 9 

308604 308607
Tokens: ['년', '##대', '##부터']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

308608 308610
Tokens: ['도시'

311243 311245
Tokens: ['있', '##었으며']
Predictions: [17, 15]
Truth: 2
Final prediction: 17 

311246 311249
Tokens: ['친', '##분', '##은']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

311249 311252
Tokens: ['수', '##년', '##간']
Predictions: [10, 9, 4]
Truth: 9
Final prediction: 9 

311252 311256
Tokens: ['지', '##속', '##됐', '##다']
Predictions: [17, 17, 17, 14]
Truth: 9
Final prediction: 17 

311271 311276
Tokens: ['모', '##리', '##코', '##네', '##의']
Predictions: [13, 13, 13, 13, 3]
Truth: 13
Final prediction: 13 

311281 311283
Tokens: ['작곡', '##에']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

311283 311287
Tokens: ['협', '##력', '##했', '##는데']
Predictions: [17, 17, 17, 15]
Truth: 9
Final prediction: 17 

311292 311294
Tokens: ['일부', '##는']
Predictions: [2, 5]
Truth: 9
Final prediction: 2 

311305 311308
Tokens: ['우', '##정', '##은']
Predictions: [9, 9, 5]
Truth: 9
Final prediction: 9 

311315 311318
Tokens: ['협', '##업', '##으로']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

3113

327184 327186
Tokens: ['по', '##чему']
Predictions: [8, 15]
Truth: 4
Final prediction: 8 

327198 327200
Tokens: ['должны', '##м']
Predictions: [2, 4]
Truth: 2
Final prediction: 2 

327409 327411
Tokens: ['Само', '##й']
Predictions: [12, 4]
Truth: 2
Final prediction: 12 

327439 327441
Tokens: ['како', '##го']
Predictions: [15, 2]
Truth: 7
Final prediction: 2 

327932 327934
Tokens: ['н', '##.']
Predictions: [18, 16]
Truth: 7
Final prediction: 16 

327988 327992
Tokens: ['Р', '##аз', '##ру', '##шение']
Predictions: [9, 17, 17, 9]
Truth: 9
Final prediction: 9 

328170 328172
Tokens: ['исто', '##щения']
Predictions: [17, 9]
Truth: 9
Final prediction: 17 

328207 328210
Tokens: ['раз', '##ру', '##шении']
Predictions: [17, 17, 9]
Truth: 9
Final prediction: 17 

328363 328367
Tokens: ['ос', '##тав', '##шей', '##ся']
Predictions: [17, 17, 17, 12]
Truth: 17
Final prediction: 17 

328638 328640
Tokens: ['по', '##явления']
Predictions: [17, 9]
Truth: 9
Final prediction: 17 

328655 328658
Token

351323 351326
Tokens: ['u', '##veden', '##ej']
Predictions: [17, 17, 2]
Truth: 2
Final prediction: 17 

351448 351453
Tokens: ['o', '##hro', '##zu', '##jú', '##cu']
Predictions: [17, 17, 17, 17, 2]
Truth: 2
Final prediction: 17 

351474 351477
Tokens: ['za', '##ru', '##čený']
Predictions: [17, 17, 2]
Truth: 2
Final prediction: 17 

351596 351601
Tokens: ['ne', '##zn', '##áš', '##aj', '##ú']
Predictions: [4, 17, 17, 17, 17]
Truth: 17
Final prediction: 17 

351735 351739
Tokens: ['prie', '##mys', '##eln', '##e']
Predictions: [2, 2, 4, 4]
Truth: 4
Final prediction: 2 

351797 351801
Tokens: ['v', '##y', '##š', '##šie']
Predictions: [2, 2, 4, 4]
Truth: 4
Final prediction: 2 

351801 351804
Tokens: ['u', '##veden', '##ých']
Predictions: [17, 17, 2]
Truth: 2
Final prediction: 17 

351858 351860
Tokens: ['člen', '##om']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

352010 352012
Tokens: ['dl', '##hou']
Predictions: [2, 17]
Truth: 2
Final prediction: 17 

352038 352042
Tokens: ['meg', '#

369216 369220
Tokens: ['Pe', '##zin', '##čani', '##a']
Predictions: [13, 13, 13, 9]
Truth: 13
Final prediction: 13 

369264 369268
Tokens: ['Kar', '##pat', '##sk', '##í']
Predictions: [13, 13, 2, 2]
Truth: 2
Final prediction: 2 

369326 369328
Tokens: ['Bo', '##zin']
Predictions: [13, 18]
Truth: 18
Final prediction: 18 

369450 369452
Tokens: ['nam', '##iesto']
Predictions: [4, 3]
Truth: 3
Final prediction: 3 

369470 369474
Tokens: ['H', '##lin', '##ík', '##ová']
Predictions: [9, 9, 9, 2]
Truth: 2
Final prediction: 9 

369525 369529
Tokens: ['ne', '##pred', '##stavu', '##jú']
Predictions: [5, 17, 17, 17]
Truth: 17
Final prediction: 17 

369681 369685
Tokens: ['ul', '##o', '##ženi', '##a']
Predictions: [17, 17, 17, 3]
Truth: 9
Final prediction: 17 

369716 369720
Tokens: ['k', '##ód', '##ovan', '##ia']
Predictions: [17, 17, 17, 9]
Truth: 9
Final prediction: 17 

369738 369741
Tokens: ['z', '##vuk', '##ových']
Predictions: [9, 9, 2]
Truth: 2
Final prediction: 9 

369750 369752
Tokens: [

Predictions: [6, 4]
Truth: 3
Final prediction: 4 

376524 376530
Tokens: ['ก', '##าร', '##เ', '##พ', '##ิ', '##่ม']
Predictions: [13, 13, 17, 17, 17, 17]
Truth: 17
Final prediction: 17 

376534 376538
Tokens: ['ใ', '##ห', '##ม', '##่']
Predictions: [5, 17, 17, 13]
Truth: 2
Final prediction: 17 

376538 376544
Tokens: ['เ', '##ก', '##ี', '##่', '##ย', '##ว']
Predictions: [13, 9, 9, 9, 9, 13]
Truth: 17
Final prediction: 9 

376550 376561
Tokens: ['ก', '##าร', '##เ', '##ป', '##ลี', '##่', '##ยน', '##แ', '##ป', '##ล', '##ง']
Predictions: [13, 13, 17, 17, 17, 17, 17, 17, 17, 9, 9]
Truth: 17
Final prediction: 17 

376562 376566
Tokens: ['ส', '##ภ', '##า', '##พ']
Predictions: [13, 13, 9, 13]
Truth: 9
Final prediction: 13 

376576 376581
Tokens: ['ค', '##ร', '##อบ', '##ง', '##ำ']
Predictions: [17, 17, 17, 9, 9]
Truth: 17
Final prediction: 17 

376590 376595
Tokens: ['ก', '##าร', '##ส', '##ู', '##ญ']
Predictions: [13, 13, 13, 9, 13]
Truth: 17
Final prediction: 13 

376606 376610
Tokens: ['ช', '

Truth: 17
Final prediction: 17 

381042 381044
Tokens: ['น', '##ัก']
Predictions: [10, 9]
Truth: 9
Final prediction: 9 

381055 381062
Tokens: ['อ', '##ย', '##่า', '##ง', '##น', '##้', '##อย']
Predictions: [13, 4, 4, 4, 4, 18, 18]
Truth: 2
Final prediction: 4 

381075 381082
Tokens: ['ป', '##ฏ', '##ิ', '##บ', '##ั', '##ติ', '##การ']
Predictions: [9, 13, 2, 2, 2, 2, 9]
Truth: 17
Final prediction: 2 

381098 381104
Tokens: ['ร', '##ะ', '##เ', '##บ', '##ีย', '##บ']
Predictions: [9, 9, 17, 9, 9, 9]
Truth: 9
Final prediction: 9 

381137 381142
Tokens: ['ก', '##าร', '##เ', '##ข', '##้า']
Predictions: [9, 9, 17, 17, 17]
Truth: 17
Final prediction: 17 

381168 381173
Tokens: ['ก', '##าร', '##เ', '##ข', '##้า']
Predictions: [9, 9, 17, 17, 17]
Truth: 17
Final prediction: 17 

381188 381191
Tokens: ['เ', '##ง', '##ิน']
Predictions: [13, 9, 13]
Truth: 9
Final prediction: 13 

381198 381200
Tokens: ['น', '##ัก']
Predictions: [13, 9]
Truth: 9
Final prediction: 9 

381222 381230
Tokens: ['ก', '##าร',

385907 385909
Tokens: ['ม', '##ี']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

385913 385917
Tokens: ['เ', '##ดี', '##ย', '##ว']
Predictions: [13, 2, 13, 13]
Truth: 2
Final prediction: 13 

385918 385920
Tokens: ['จ', '##ะ']
Predictions: [13, 5]
Truth: 17
Final prediction: 5 

385925 385928
Tokens: ['ค', '##ื', '##น']
Predictions: [13, 9, 13]
Truth: 17
Final prediction: 13 

385929 385936
Tokens: ['ก', '##าร', '##เ', '##อ', '##า', '##ช', '##นะ']
Predictions: [13, 13, 13, 13, 13, 17, 17]
Truth: 17
Final prediction: 13 

385962 385964
Tokens: ['ไ', '##ด้']
Predictions: [5, 13]
Truth: 17
Final prediction: 13 

385974 385978
Tokens: ['ส', '##าม', '##าร', '##ถ']
Predictions: [13, 17, 2, 13]
Truth: 17
Final prediction: 13 

385978 385981
Tokens: ['ส', '##น', '##อง']
Predictions: [17, 17, 13]
Truth: 17
Final prediction: 17 

385981 385987
Tokens: ['ค', '##ว', '##าม', '##ต', '##้อง', '##การ']
Predictions: [13, 9, 13, 17, 17, 13]
Truth: 17
Final prediction: 13 

385996 385999
Tokens: 

390118 390121
Tokens: ['เ', '##ช', '##ื่อ']
Predictions: [14, 9, 9]
Truth: 17
Final prediction: 9 

390121 390123
Tokens: ['ว', '##่า']
Predictions: [9, 8]
Truth: 3
Final prediction: 8 

390123 390132
Tokens: ['ก', '##าร', '##แ', '##ท', '##ร', '##ก', '##แ', '##ซ', '##ง']
Predictions: [13, 9, 17, 17, 17, 17, 2, 2, 13]
Truth: 17
Final prediction: 17 

390132 390135
Tokens: ['จ', '##ำ', '##เป็น']
Predictions: [13, 9, 17]
Truth: 2
Final prediction: 9 

390135 390137
Tokens: ['ต', '##่อ']
Predictions: [13, 17]
Truth: 3
Final prediction: 17 

390137 390142
Tokens: ['ก', '##าร', '##ป', '##้อง', '##กัน']
Predictions: [13, 9, 17, 17, 13]
Truth: 17
Final prediction: 13 

390142 390145
Tokens: ['ไ', '##ม', '##่']
Predictions: [12, 5, 12]
Truth: 11
Final prediction: 12 

390145 390148
Tokens: ['ใ', '##ห', '##้']
Predictions: [5, 17, 17]
Truth: 17
Final prediction: 17 

390178 390182
Tokens: ['ต', '##่อ', '##ไ', '##ป']
Predictions: [13, 9, 9, 9]
Truth: 4
Final prediction: 9 

390187 390191
Tokens: 

Tokens: ['ห', '##น', '##ึ', '##่ง']
Predictions: [13, 2, 2, 2]
Truth: 10
Final prediction: 2 

393904 393907
Tokens: ['ไ', '##ม', '##่']
Predictions: [5, 13, 13]
Truth: 11
Final prediction: 13 

393907 393910
Tokens: ['ร', '##ู', '##้']
Predictions: [13, 17, 13]
Truth: 17
Final prediction: 13 

393910 393915
Tokens: ['เ', '##ห', '##ม', '##ือ', '##น']
Predictions: [13, 2, 2, 2, 2]
Truth: 5
Final prediction: 2 

393915 393917
Tokens: ['ก', '##ัน']
Predictions: [13, 12]
Truth: 12
Final prediction: 12 

393917 393919
Tokens: ['ว', '##่า']
Predictions: [17, 8]
Truth: 3
Final prediction: 8 

393919 393923
Tokens: ['ท', '##ำ', '##ไ', '##ม']
Predictions: [17, 17, 17, 13]
Truth: 4
Final prediction: 17 

393926 393929
Tokens: ['เ', '##ล', '##ือก']
Predictions: [13, 9, 9]
Truth: 17
Final prediction: 9 

393955 393959
Tokens: ['ป', '##ร', '##าก', '##ฏ']
Predictions: [13, 13, 17, 17]
Truth: 17
Final prediction: 17 

393972 393978
Tokens: ['อ', '##ย', '##่า', '##ง', '##มา', '##ก']
Predictions: [13, 

Tokens: ['แ', '##ต', '##ก', '##ต', '##่า', '##ง']
Predictions: [6, 2, 2, 2, 2, 9]
Truth: 17
Final prediction: 2 

399072 399078
Tokens: ['ก', '##ร', '##ะ', '##บ', '##วน', '##การ']
Predictions: [17, 17, 17, 17, 17, 9]
Truth: 9
Final prediction: 17 

399078 399083
Tokens: ['ร', '##ะ', '##ห', '##ว่า', '##ง']
Predictions: [10, 17, 17, 17, 17]
Truth: 3
Final prediction: 17 

399097 399101
Tokens: ['ก', '##าร', '##จ', '##ัด']
Predictions: [9, 9, 17, 17]
Truth: 17
Final prediction: 9 

399113 399117
Tokens: ['ก', '##าร', '##จ', '##ัด']
Predictions: [9, 9, 17, 17]
Truth: 17
Final prediction: 9 

399135 399142
Tokens: ['ก', '##าร', '##ย', '##อ', '##ม', '##ร', '##ับ']
Predictions: [6, 15, 17, 17, 17, 17, 17]
Truth: 17
Final prediction: 17 

399142 399153
Tokens: ['อ', '##ย', '##่า', '##ง', '##ก', '##ว', '##้า', '##ง', '##ข', '##ว', '##าง']
Predictions: [13, 9, 9, 9, 17, 2, 2, 2, 2, 17, 17]
Truth: 2
Final prediction: 2 

399153 399155
Tokens: ['ที่', '##สุด']
Predictions: [9, 2]
Truth: 4
Final pr

Tokens: ['พ', '##ื', '##้น']
Predictions: [13, 9, 9]
Truth: 9
Final prediction: 9 

404741 404743
Tokens: ['ม', '##ี']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

404743 404751
Tokens: ['พ', '##ื', '##ช', '##พ', '##ัน', '##ธ', '##ุ', '##์']
Predictions: [13, 9, 9, 13, 9, 13, 13, 13]
Truth: 9
Final prediction: 13 

404751 404757
Tokens: ['ป', '##ก', '##ค', '##ล', '##ุ', '##ม']
Predictions: [13, 13, 13, 13, 9, 9]
Truth: 17
Final prediction: 13 

404764 404766
Tokens: ['จ', '##ะ']
Predictions: [8, 14]
Truth: 17
Final prediction: 8 

404766 404768
Tokens: ['ก', '##่อ']
Predictions: [17, 13]
Truth: 17
Final prediction: 17 

404776 404779
Tokens: ['แ', '##น', '##ว']
Predictions: [9, 9, 13]
Truth: 9
Final prediction: 9 

404792 404794
Tokens: ['จ', '##ะ']
Predictions: [13, 14]
Truth: 17
Final prediction: 13 

404806 404809
Tokens: ['ซ', '##ึ', '##่ง']
Predictions: [13, 15, 15]
Truth: 7
Final prediction: 15 

404829 404831
Tokens: ['แ', '##ล้ว']
Predictions: [4, 13]
Truth: 11
Final p

Truth: 2
Final prediction: 9 

411104 411107
Tokens: ['แ', '##ต', '##่']
Predictions: [6, 5, 3]
Truth: 6
Final prediction: 3 

411120 411122
Tokens: ['จ', '##ะ']
Predictions: [13, 5]
Truth: 17
Final prediction: 5 

411126 411128
Tokens: ['ก', '##็']
Predictions: [14, 12]
Truth: 4
Final prediction: 12 

411131 411135
Tokens: ['ส', '##าม', '##าร', '##ถ']
Predictions: [5, 17, 2, 2]
Truth: 17
Final prediction: 2 

411154 411157
Tokens: ['เ', '##ร', '##ื่อง']
Predictions: [13, 9, 9]
Truth: 9
Final prediction: 9 

411169 411172
Tokens: ['เ', '##ร', '##ื่อง']
Predictions: [13, 9, 9]
Truth: 9
Final prediction: 9 

411172 411178
Tokens: ['ต', '##ี', '##พ', '##ิม', '##พ', '##์']
Predictions: [13, 9, 9, 9, 9, 9]
Truth: 17
Final prediction: 9 

411186 411191
Tokens: ['ร', '##ะ', '##ห', '##ว่า', '##ง']
Predictions: [10, 4, 17, 4, 3]
Truth: 3
Final prediction: 4 

411212 411214
Tokens: ['ค', '##ิด']
Predictions: [17, 9]
Truth: 17
Final prediction: 17 

411215 411219
Tokens: ['อ', '##นา', '##ค', '##ต

Predictions: [9, 9, 9, 9, 9, 9, 9, 17]
Truth: 17
Final prediction: 9 

416357 416359
Tokens: ['ม', '##ี']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

416363 416368
Tokens: ['อ', '##ย', '##่า', '##ง', '##เป็น']
Predictions: [9, 4, 4, 4, 17]
Truth: 5
Final prediction: 4 

416368 416371
Tokens: ['ท', '##าง', '##การ']
Predictions: [13, 2, 9]
Truth: 9
Final prediction: 9 

416373 416379
Tokens: ['ป', '##ร', '##า', '##ศ', '##จ', '##าก']
Predictions: [9, 9, 9, 9, 17, 3]
Truth: 17
Final prediction: 9 

416379 416388
Tokens: ['ค', '##ว', '##าม', '##เ', '##ห', '##็', '##น', '##ช', '##อบ']
Predictions: [9, 9, 9, 17, 9, 9, 9, 9, 17]
Truth: 17
Final prediction: 9 

416391 416394
Tokens: ['เ', '##ข', '##า']
Predictions: [13, 9, 9]
Truth: 12
Final prediction: 9 

416408 416414
Tokens: ['เ', '##ห', '##ต', '##ุ', '##การ', '##ณ์']
Predictions: [13, 9, 9, 9, 9, 9]
Truth: 9
Final prediction: 9 

416452 416460
Tokens: ['ค', '##ว', '##าม', '##ต', '##ก', '##ต', '##่', '##ำ']
Predictions: [13, 9, 9,

422425 422427
Tokens: ['จ', '##ะ']
Predictions: [8, 5]
Truth: 17
Final prediction: 8 

422435 422438
Tokens: ['เ', '##ข', '##้า']
Predictions: [13, 13, 17]
Truth: 11
Final prediction: 13 

422503 422506
Tokens: ['อ', '##ย', '##ู่']
Predictions: [6, 17, 17]
Truth: 11
Final prediction: 17 

422554 422558
Tokens: ['เ', '##ก', '##ือ', '##บ']
Predictions: [13, 9, 9, 9]
Truth: 4
Final prediction: 9 

422558 422560
Tokens: ['จ', '##ะ']
Predictions: [8, 5]
Truth: 17
Final prediction: 8 

422582 422591
Tokens: ['ก', '##าร', '##เ', '##ส', '##ีย', '##ช', '##ี', '##ว', '##ิต']
Predictions: [9, 9, 13, 17, 9, 9, 9, 9, 9]
Truth: 17
Final prediction: 9 

422599 422602
Tokens: ['เ', '##ร', '##ื่อง']
Predictions: [13, 9, 9]
Truth: 9
Final prediction: 9 

422602 422605
Tokens: ['อ', '##ี', '##ก']
Predictions: [10, 10, 8]
Truth: 7
Final prediction: 10 

422620 422628
Tokens: ['เ', '##ก', '##ี', '##่', '##ย', '##ว', '##ข', '##้อง']
Predictions: [13, 9, 2, 2, 9, 9, 9, 9]
Truth: 17
Final prediction: 9 

4226

428255 428257
Tokens: ['ม', '##ี']
Predictions: [5, 17]
Truth: 17
Final prediction: 17 

428257 428265
Tokens: ['ค', '##ว', '##าม', '##ส', '##ำ', '##ค', '##ั', '##ญ']
Predictions: [13, 9, 9, 2, 2, 2, 2, 2]
Truth: 2
Final prediction: 2 

428286 428292
Tokens: ['เ', '##ร', '##ิ', '##่ม', '##ต', '##้น']
Predictions: [17, 17, 17, 17, 9, 9]
Truth: 17
Final prediction: 17 

428308 428310
Tokens: ['ก', '##ัน']
Predictions: [13, 12]
Truth: 12
Final prediction: 12 

428313 428318
Tokens: ['เ', '##ห', '##ม', '##ือ', '##น']
Predictions: [13, 13, 2, 13, 13]
Truth: 17
Final prediction: 13 

428353 428355
Tokens: ['ค', '##ู่']
Predictions: [13, 9]
Truth: 9
Final prediction: 9 

428374 428378
Tokens: ['ไ', '##ด้', '##ร', '##ับ']
Predictions: [5, 17, 17, 17]
Truth: 17
Final prediction: 17 

428386 428391
Tokens: ['ย', '##ื', '##น', '##ย', '##ัน']
Predictions: [17, 13, 17, 13, 13]
Truth: 17
Final prediction: 13 

428391 428393
Tokens: ['ว', '##่า']
Predictions: [13, 8]
Truth: 3
Final prediction: 8 

42

Tokens: ['ไ', '##ด้']
Predictions: [5, 17]
Truth: 11
Final prediction: 17 

433558 433565
Tokens: ['ก', '##าร', '##ต', '##ก', '##แ', '##ต', '##่ง']
Predictions: [13, 13, 17, 17, 17, 17, 17]
Truth: 17
Final prediction: 17 

433565 433569
Tokens: ['ใ', '##ห', '##ม', '##่']
Predictions: [13, 17, 17, 3]
Truth: 2
Final prediction: 17 

433576 433578
Tokens: ['ส', '##าม']
Predictions: [13, 2]
Truth: 10
Final prediction: 2 

433578 433581
Tokens: ['ส', '##ิ', '##บ']
Predictions: [13, 2, 2]
Truth: 10
Final prediction: 2 

433582 433584
Tokens: ['ม', '##ี']
Predictions: [13, 17]
Truth: 17
Final prediction: 17 

433590 433596
Tokens: ['แ', '##ต', '##ก', '##ต', '##่า', '##ง']
Predictions: [5, 2, 2, 2, 2, 9]
Truth: 17
Final prediction: 2 

433598 433604
Tokens: ['เ', '##ก', '##ี', '##่', '##ย', '##ว']
Predictions: [13, 9, 9, 9, 9, 9]
Truth: 17
Final prediction: 9 

433647 433652
Tokens: ['ค', '##ว', '##าม', '##ยา', '##ว']
Predictions: [9, 9, 9, 13, 13]
Truth: 2
Final prediction: 9 

433652 433654


439194 439196
Tokens: ['เ', '##กิด']
Predictions: [0, 9]
Truth: 17
Final prediction: 0 

439225 439233
Tokens: ['ม', '##ั', '##่น', '##พ', '##ร', '##ะ', '##ท', '##ัย']
Predictions: [13, 8, 8, 13, 13, 13, 13, 13]
Truth: 17
Final prediction: 13 

439235 439244
Tokens: ['ค', '##ว', '##าม', '##ส', '##ำ', '##เ', '##ร', '##็', '##จ']
Predictions: [13, 9, 9, 17, 17, 2, 2, 2, 2]
Truth: 17
Final prediction: 2 

439252 439254
Tokens: ['จ', '##ะ']
Predictions: [13, 8]
Truth: 17
Final prediction: 8 

439258 439261
Tokens: ['ซ', '##ึ', '##่ง']
Predictions: [13, 8, 8]
Truth: 3
Final prediction: 8 

439276 439279
Tokens: ['แ', '##ต', '##่']
Predictions: [6, 8, 15]
Truth: 6
Final prediction: 8 

439316 439319
Tokens: ['น', '##าย', '##ก']
Predictions: [13, 9, 13]
Truth: 9
Final prediction: 13 

439319 439324
Tokens: ['ร', '##ั', '##ฐ', '##ม', '##นตรี']
Predictions: [13, 13, 9, 9, 9]
Truth: 9
Final prediction: 9 

439325 439329
Tokens: ['ใ', '##ห', '##ม', '##่']
Predictions: [5, 17, 17, 13]
Truth: 2
Fin

Truth: 5
Final prediction: 9 

443613 443617
Tokens: ['Pe', '##kin', "##'", '##in']
Predictions: [13, 13, 13, 11]
Truth: 13
Final prediction: 13 

443623 443625
Tokens: ['alanı', '##ndaki']
Predictions: [9, 18]
Truth: 1
Final prediction: 9 

443625 443627
Tokens: ['alanı', '##nda']
Predictions: [9, 4]
Truth: 9
Final prediction: 9 

443628 443631
Tokens: ['h', '##ır', '##sını']
Predictions: [9, 9, 12]
Truth: 9
Final prediction: 9 

443631 443634
Tokens: ['ser', '##gile', '##mesi']
Predictions: [17, 17, 12]
Truth: 9
Final prediction: 17 

443656 443658
Tokens: ['bölümü', '##n']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

443658 443661
Tokens: ['kap', '##atı', '##lma']
Predictions: [17, 17, 9]
Truth: 9
Final prediction: 17 

443670 443673
Tokens: ['ya', '##zı', '##k']
Predictions: [17, 17, 8]
Truth: 9
Final prediction: 17 

443681 443684
Tokens: ['tale', '##biy', '##le']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

443687 443692
Tokens: ['e', '##ğ', '##len', '##cesi', '##

Final prediction: 17 

447280 447282
Tokens: ['dışında', '##ki']
Predictions: [4, 12]
Truth: 1
Final prediction: 4 

447286 447289
Tokens: ['kas', '##aba', '##sından']
Predictions: [9, 9, 3]
Truth: 9
Final prediction: 9 

447297 447299
Tokens: ['merkezi', '##ni']
Predictions: [9, 11]
Truth: 9
Final prediction: 9 

447318 447321
Tokens: ['kan', '##unun', '##u']
Predictions: [9, 9, 11]
Truth: 9
Final prediction: 9 

447322 447324
Tokens: ['etti', '##kten']
Predictions: [17, 15]
Truth: 9
Final prediction: 17 

447327 447329
Tokens: ['insanlar', '##a']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

447331 447335
Tokens: ['ya', '##park', '##en', '##ki']
Predictions: [17, 17, 17, 12]
Truth: 1
Final prediction: 17 

447339 447343
Tokens: ['zo', '##rl', '##uk', '##ları']
Predictions: [9, 9, 9, 4]
Truth: 9
Final prediction: 9 

447345 447349
Tokens: ['a', '##şa', '##cak', '##larını']
Predictions: [17, 17, 9, 12]
Truth: 9
Final prediction: 17 

447349 447353
Tokens: ['ö', '##ğ', '##ret', '##

Predictions: [9, 9, 9, 17]
Truth: 1
Final prediction: 9 

450931 450934
Tokens: ['saha', '##mı', '##zda']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

450937 450942
Tokens: ['kay', '##bett', '##iği', '##mi', '##z']
Predictions: [17, 17, 12, 12, 12]
Truth: 9
Final prediction: 12 

450966 450968
Tokens: ['oynadı', '##ğında']
Predictions: [17, 15]
Truth: 9
Final prediction: 17 

450969 450972
Tokens: ['kes', '##in', '##likle']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

450972 450975
Tokens: ['b', '##öyle', '##ydi']
Predictions: [2, 2, 17]
Truth: 1
Final prediction: 2 

450975 450977
Tokens: ['b', '##öyle']
Predictions: [4, 2]
Truth: 2
Final prediction: 2 

450985 450987
Tokens: ['ray', '##ından']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

450994 450998
Tokens: ['ce', '##va', '##plar', '##ını']
Predictions: [9, 9, 9, 11]
Truth: 9
Final prediction: 9 

450998 451002
Tokens: ['ar', '##ı', '##yor', '##muş']
Predictions: [17, 17, 4, 4]
Truth: 17
Final prediction: 17

454802 454806
Tokens: ['Pasifik', "##'", '##tek', '##i']
Predictions: [13, 13, 2, 2]
Truth: 1
Final prediction: 2 

454806 454809
Tokens: ['Pasifik', "##'", '##te']
Predictions: [13, 13, 18]
Truth: 13
Final prediction: 13 

454816 454821
Tokens: ['Ok', '##yan', '##usu', "##'", '##ndaki']
Predictions: [13, 13, 13, 13, 18]
Truth: 1
Final prediction: 13 

454821 454826
Tokens: ['Ok', '##yan', '##usu', "##'", '##nda']
Predictions: [13, 13, 13, 0, 4]
Truth: 9
Final prediction: 13 

454864 454866
Tokens: ['ada', '##sına']
Predictions: [9, 3]
Truth: 9
Final prediction: 9 

454867 454870
Tokens: ['Meksika', "##'", '##nın']
Predictions: [13, 13, 11]
Truth: 13
Final prediction: 13 

454872 454877
Tokens: ['k', '##ı', '##yıl', '##arın', '##daki']
Predictions: [9, 9, 9, 9, 3]
Truth: 1
Final prediction: 9 

454877 454882
Tokens: ['k', '##ı', '##yıl', '##arın', '##da']
Predictions: [9, 9, 9, 9, 3]
Truth: 9
Final prediction: 9 

454898 454902
Tokens: ['it', '##hala', '##tı', '##na']
Predictions: [9, 

458555 458557
Tokens: ['bölge', '##lerini']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

458566 458570
Tokens: ['Deniz', '##i', '##"', '##den']
Predictions: [13, 13, 13, 3]
Truth: 9
Final prediction: 13 

458578 458580
Tokens: ['kanal', '##ın']
Predictions: [13, 11]
Truth: 9
Final prediction: 11 

458580 458583
Tokens: ['ku', '##zey', '##inde']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

458587 458591
Tokens: ['Deniz', '##i', '##"', '##ni']
Predictions: [13, 13, 13, 11]
Truth: 9
Final prediction: 13 

458598 458600
Tokens: ['kanal', '##ının']
Predictions: [13, 11]
Truth: 9
Final prediction: 11 

458626 458629
Tokens: ['sul', '##arın', '##a']
Predictions: [13, 13, 3]
Truth: 9
Final prediction: 13 

458636 458638
Tokens: ['bu', '##lma']
Predictions: [17, 9]
Truth: 9
Final prediction: 17 

458638 458642
Tokens: ['ara', '##yı', '##şı', '##yla']
Predictions: [9, 9, 9, 4]
Truth: 9
Final prediction: 9 

458644 458646
Tokens: ['gitt', '##iği']
Predictions: [17, 12]
Truth: 2
Fi

462430 462432
Tokens: ['döneminde', '##ki']
Predictions: [4, 11]
Truth: 1
Final prediction: 11 

462441 462444
Tokens: ['tar', '##ih', '##ten']
Predictions: [9, 9, 17]
Truth: 9
Final prediction: 9 

462449 462451
Tokens: ['ç', '##ünkü']
Predictions: [4, 15]
Truth: 4
Final prediction: 4 

462451 462453
Tokens: ['klasik', '##lerin']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

462458 462463
Tokens: ['da', '##vra', '##nı', '##şları', '##na']
Predictions: [9, 9, 9, 9, 11]
Truth: 9
Final prediction: 9 

462473 462475
Tokens: ['sun', '##duğu']
Predictions: [17, 12]
Truth: 9
Final prediction: 17 

462491 462493
Tokens: ['kadın', '##ların']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

462499 462501
Tokens: ['roller', '##ini']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

462501 462504
Tokens: ['kap', '##say', '##an']
Predictions: [17, 17, 2]
Truth: 2
Final prediction: 17 

462518 462520
Tokens: ['bilgi', '##lerin']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

462524 

466510 466513
Tokens: ['Sky', '##lar', '##k']
Predictions: [2, 13, 13]
Truth: 13
Final prediction: 13 

466513 466515
Tokens: ['roman', '##larının']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

466523 466527
Tokens: ['tek', '##nol', '##oji', '##lerinin']
Predictions: [9, 9, 9, 12]
Truth: 9
Final prediction: 9 

466527 466531
Tokens: ['ön', '##cü', '##ller', '##iyle']
Predictions: [9, 9, 9, 3]
Truth: 9
Final prediction: 9 

466531 466533
Tokens: ['ilgili', '##dir']
Predictions: [2, 0]
Truth: 1
Final prediction: 0 

466552 466555
Tokens: ['Curie', "##'", '##ye']
Predictions: [13, 9, 9]
Truth: 13
Final prediction: 9 

466562 466566
Tokens: ['labor', '##atu', '##var', '##ının']
Predictions: [9, 9, 9, 12]
Truth: 9
Final prediction: 9 

466575 466581
Tokens: ['ha', '##zır', '##laya', '##bile', '##ce', '##ğini']
Predictions: [17, 17, 17, 17, 17, 12]
Truth: 9
Final prediction: 17 

466586 466588
Tokens: ['onun', '##la']
Predictions: [12, 4]
Truth: 12
Final prediction: 12 

466588 466592

Predictions: [9, 9, 10]
Truth: 9
Final prediction: 9 

470402 470405
Tokens: ['bi', '##çim', '##de']
Predictions: [9, 9, 4]
Truth: 9
Final prediction: 9 

470415 470418
Tokens: ['av', '##roy', '##a']
Predictions: [9, 9, 10]
Truth: 9
Final prediction: 9 

470434 470436
Tokens: ['bank', '##anın']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

470436 470438
Tokens: ['kendi', '##lerine']
Predictions: [12, 11]
Truth: 9
Final prediction: 11 

470450 470453
Tokens: ['tek', '##lifi', '##ni']
Predictions: [9, 9, 11]
Truth: 9
Final prediction: 9 

470457 470461
Tokens: ['Bel', '##irt', '##ildi', '##ği']
Predictions: [17, 17, 17, 12]
Truth: 9
Final prediction: 17 

470464 470468
Tokens: ['EC', '##B', "##'", '##nin']
Predictions: [13, 13, 13, 11]
Truth: 13
Final prediction: 13 

470468 470474
Tokens: ['im', '##ti', '##ya', '##zı', '##nda', '##dır']
Predictions: [9, 9, 9, 9, 4, 6]
Truth: 1
Final prediction: 9 

470474 470479
Tokens: ['im', '##ti', '##ya', '##zı', '##nda']
Predictions: [9, 9, 9

474060 474064
Tokens: ['tri', '##b', '##ün', '##lerden']
Predictions: [9, 9, 9, 3]
Truth: 9
Final prediction: 9 

474065 474067
Tokens: ['kalan', '##ları']
Predictions: [9, 12]
Truth: 9
Final prediction: 9 

474069 474071
Tokens: ['etmiş', '##ti']
Predictions: [17, 12]
Truth: 5
Final prediction: 17 

474078 474081
Tokens: ['1969', "##'", '##da']
Predictions: [10, 13, 4]
Truth: 9
Final prediction: 10 

474083 474086
Tokens: ['hu', '##kuk', '##undan']
Predictions: [13, 9, 3]
Truth: 9
Final prediction: 9 

474088 474091
Tokens: ['cez', '##ası', '##nı']
Predictions: [9, 9, 13]
Truth: 9
Final prediction: 9 

474102 474107
Tokens: ['h', '##ük', '##üm', '##sü', '##z']
Predictions: [9, 9, 9, 13, 2]
Truth: 2
Final prediction: 9 

474117 474122
Tokens: ['n', '##üm', '##isma', '##tik', '##tir']
Predictions: [2, 2, 2, 2, 4]
Truth: 1
Final prediction: 2 

474130 474135
Tokens: ['kole', '##ksi', '##yon', '##cular', '##a']
Predictions: [9, 9, 9, 9, 11]
Truth: 9
Final prediction: 9 

474135 474138
Tok

492507 492509
Tokens: ['特', '##別']
Predictions: [9, 18]
Truth: 2
Final prediction: 9 

492520 492522
Tokens: ['發', '##布']
Predictions: [13, 2]
Truth: 17
Final prediction: 2 

492523 492525
Tokens: ['博', '##客']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

492526 492528
Tokens: ['寫', '##道']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

492529 492531
Tokens: ['對', '##於']
Predictions: [18, 14]
Truth: 3
Final prediction: 18 

492533 492535
Tokens: ['社', '##交']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

492538 492540
Tokens: ['跟', '##踪']
Predictions: [17, 13]
Truth: 17
Final prediction: 17 

492543 492545
Tokens: ['任', '##職']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

492559 492561
Tokens: ['不', '##同']
Predictions: [18, 0]
Truth: 2
Final prediction: 0 

492572 492574
Tokens: ['進', '##行']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

492574 492576
Tokens: ['移', '##民']
Predictions: [17, 18]
Truth: 9
Final prediction: 17 

492576 492578
Tokens: ['削', '##減']
P

496006 496008
Tokens: ['人', '##員']
Predictions: [9, 14]
Truth: 9
Final prediction: 9 

496012 496014
Tokens: ['進', '##行']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

496031 496035
Tokens: ['安', '##大', '##略', '##省']
Predictions: [10, 10, 18, 18]
Truth: 13
Final prediction: 10 

496046 496051
Tokens: ['邁', '##科', '##內', '##爾', '##里']
Predictions: [13, 0, 14, 14, 14]
Truth: 13
Final prediction: 14 

496054 496056
Tokens: ['本', '##週']
Predictions: [7, 9]
Truth: 9
Final prediction: 9 

496060 496062
Tokens: ['事', '##項']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

496074 496078
Tokens: ['安', '##大', '##略', '##省']
Predictions: [13, 13, 18, 18]
Truth: 13
Final prediction: 18 

496090 496093
Tokens: ['煽', '##動', '##性']
Predictions: [17, 0, 13]
Truth: 17
Final prediction: 0 

496094 496096
Tokens: ['言', '##論']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

496105 496107
Tokens: ['成', '##功']
Predictions: [17, 9]
Truth: 2
Final prediction: 17 

496110 496112
Tokens: ['預', '##期'

Predictions: [2, 0, 9]
Truth: 9
Final prediction: 0 

499426 499430
Tokens: ['荒', '##誕', '##不', '##經']
Predictions: [2, 2, 14, 14]
Truth: 17
Final prediction: 2 

499437 499439
Tokens: ['至', '##少']
Predictions: [4, 0]
Truth: 4
Final prediction: 0 

499446 499448
Tokens: ['一', '##直']
Predictions: [10, 0]
Truth: 4
Final prediction: 0 

499487 499489
Tokens: ['可', '##以']
Predictions: [13, 10]
Truth: 5
Final prediction: 10 

499494 499498
Tokens: ['卡', '##塔', '##拉', '##諾']
Predictions: [13, 13, 13, 14]
Truth: 13
Final prediction: 13 

499518 499521
Tokens: ['復', '##古', '##味']
Predictions: [13, 13, 0]
Truth: 2
Final prediction: 13 

499527 499529
Tokens: ['他', '##們']
Predictions: [12, 14]
Truth: 12
Final prediction: 12 

499541 499545
Tokens: ['卡', '##塔', '##拉', '##諾']
Predictions: [13, 18, 14, 14]
Truth: 13
Final prediction: 14 

499555 499557
Tokens: ['隨', '##後']
Predictions: [4, 0]
Truth: 4
Final prediction: 0 

499571 499573
Tokens: ['以', '##前']
Predictions: [5, 11]
Truth: 9
Final predi

503197 503199
Tokens: ['市', '##場']
Predictions: [13, 9]
Truth: 9
Final prediction: 9 

503199 503201
Tokens: ['齊', '##平']
Predictions: [4, 10]
Truth: 17
Final prediction: 10 

503206 503208
Tokens: ['政', '##府']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

503210 503212
Tokens: ['全', '##部']
Predictions: [12, 3]
Truth: 7
Final prediction: 3 

503226 503228
Tokens: ['受', '##到']
Predictions: [17, 13]
Truth: 17
Final prediction: 17 

503231 503233
Tokens: ['最', '##大']
Predictions: [4, 2]
Truth: 2
Final prediction: 2 

503239 503241
Tokens: ['鼎', '##力']
Predictions: [9, 2]
Truth: 4
Final prediction: 9 

503245 503247
Tokens: ['中', '##國']
Predictions: [9, 2]
Truth: 13
Final prediction: 9 

503247 503249
Tokens: ['地', '##方']
Predictions: [9, 2]
Truth: 9
Final prediction: 9 

503251 503255
Tokens: ['累', '##積', '##如', '##山']
Predictions: [17, 17, 9, 9]
Truth: 17
Final prediction: 17 

503263 503265
Tokens: ['只', '##有']
Predictions: [4, 15]
Truth: 3
Final prediction: 4 

503291 503293
Toke

507138 507140
Tokens: ['基', '##督']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

507140 507142
Tokens: ['教', '##堂']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

507142 507144
Tokens: ['通', '##常']
Predictions: [2, 9]
Truth: 4
Final prediction: 9 

507147 507149
Tokens: ['主', '##日']
Predictions: [13, 9]
Truth: 9
Final prediction: 9 

507156 507158
Tokens: ['取', '##代']
Predictions: [17, 18]
Truth: 17
Final prediction: 17 

507162 507166
Tokens: ['阿', '##爾', '##卑', '##斯']
Predictions: [18, 18, 18, 14]
Truth: 13
Final prediction: 18 

507166 507168
Tokens: ['山', '##脈']
Predictions: [9, 18]
Truth: 9
Final prediction: 9 

507179 507182
Tokens: ['飲', '##用', '##水']
Predictions: [9, 9, 18]
Truth: 17
Final prediction: 9 

507183 507186
Tokens: ['灌', '##溉', '##水']
Predictions: [9, 9, 14]
Truth: 17
Final prediction: 9 

507189 507191
Tokens: ['發', '##電']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

507201 507203
Tokens: ['佔', '##據']
Predictions: [17, 2]
Truth: 17
Final predictio

511160 511162
Tokens: ['湖', '##區']
Predictions: [9, 13]
Truth: 9
Final prediction: 9 

511164 511166
Tokens: ['深', '##深']
Predictions: [2, 0]
Truth: 4
Final prediction: 0 

511166 511168
Tokens: ['打', '##動']
Predictions: [9, 0]
Truth: 17
Final prediction: 0 

511169 511171
Tokens: ['以', '##至']
Predictions: [18, 0]
Truth: 17
Final prediction: 0 

511172 511174
Tokens: ['他', '##們']
Predictions: [12, 0]
Truth: 12
Final prediction: 0 

511175 511177
Tokens: ['第', '##一']
Predictions: [2, 10]
Truth: 2
Final prediction: 2 

511178 511180
Tokens: ['孩', '##子']
Predictions: [9, 0]
Truth: 9
Final prediction: 0 

511196 511198
Tokens: ['那', '##兒']
Predictions: [7, 2]
Truth: 12
Final prediction: 2 

511213 511215
Tokens: ['一', '##同']
Predictions: [10, 4]
Truth: 4
Final prediction: 10 

511218 511222
Tokens: ['佛', '##羅', '##倫', '##薩']
Predictions: [9, 9, 9, 14]
Truth: 13
Final prediction: 9 

511222 511225
Tokens: ['葛', '##拉', '##德']
Predictions: [13, 13, 14]
Truth: 13
Final prediction: 13 

511228 

Predictions: [13, 9]
Truth: 9
Final prediction: 9 

515051 515053
Tokens: ['未', '##能']
Predictions: [5, 2]
Truth: 5
Final prediction: 2 

515075 515077
Tokens: ['真', '##的']
Predictions: [8, 14]
Truth: 4
Final prediction: 8 

515077 515079
Tokens: ['很', '##愛']
Predictions: [4, 0]
Truth: 17
Final prediction: 0 

515094 515096
Tokens: ['單', '##曲']
Predictions: [2, 9]
Truth: 9
Final prediction: 9 

515109 515111
Tokens: ['已', '##經']
Predictions: [5, 17]
Truth: 4
Final prediction: 17 

515117 515119
Tokens: ['故', '##鄉']
Predictions: [2, 14]
Truth: 9
Final prediction: 2 

515123 515125
Tokens: ['到', '##達']
Predictions: [3, 0]
Truth: 17
Final prediction: 0 

515136 515138
Tokens: ['回', '##到']
Predictions: [17, 13]
Truth: 17
Final prediction: 17 

515138 515141
Tokens: ['洛', '##杉', '##磯']
Predictions: [13, 13, 0]
Truth: 13
Final prediction: 13 

515141 515143
Tokens: ['定', '##居']
Predictions: [9, 13]
Truth: 17
Final prediction: 9 

515144 515148
Tokens: ['一', '##心', '##一', '##意']
Predictions: 

519661 519663
Tokens: ['很', '##大']
Predictions: [4, 2]
Truth: 2
Final prediction: 2 

519663 519665
Tokens: ['改', '##進']
Predictions: [9, 0]
Truth: 17
Final prediction: 0 

519669 519672
Tokens: ['伊', '##斯', '##蘭']
Predictions: [8, 18, 13]
Truth: 13
Final prediction: 8 

519699 519701
Tokens: ['需', '##要']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

519704 519706
Tokens: ['多', '##數']
Predictions: [2, 9]
Truth: 9
Final prediction: 9 

519707 519709
Tokens: ['第', '##三']
Predictions: [9, 10]
Truth: 2
Final prediction: 9 

519775 519777
Tokens: ['發', '##展']
Predictions: [17, 9]
Truth: 9
Final prediction: 17 

519807 519811
Tokens: ['尼', '##古', '##拉', '##斯']
Predictions: [13, 13, 13, 14]
Truth: 13
Final prediction: 13 

519828 519832
Tokens: ['前', '##司', '##法', '##部']
Predictions: [4, 14, 14, 14]
Truth: 9
Final prediction: 14 

519848 519851
Tokens: ['法', '##西', '##斯']
Predictions: [9, 9, 18]
Truth: 13
Final prediction: 9 

519867 519869
Tokens: ['暴', '##徒']
Predictions: [9, 0]
Trut

Final prediction: 0 

523490 523492
Tokens: ['為', '##了']
Predictions: [5, 11]
Truth: 3
Final prediction: 11 

523492 523494
Tokens: ['參', '##加']
Predictions: [17, 9]
Truth: 17
Final prediction: 17 

523507 523509
Tokens: ['不', '##得']
Predictions: [11, 5]
Truth: 5
Final prediction: 11 

523510 523512
Tokens: ['離', '##開']
Predictions: [17, 0]
Truth: 17
Final prediction: 0 

523517 523519
Tokens: ['公', '##元']
Predictions: [10, 13]
Truth: 9
Final prediction: 10 

523532 523534
Tokens: ['之', '##後']
Predictions: [3, 9]
Truth: 3
Final prediction: 9 

523537 523539
Tokens: ['逐', '##漸']
Predictions: [17, 0]
Truth: 4
Final prediction: 0 

523544 523548
Tokens: ['嶄', '##露', '##頭', '##角']
Predictions: [2, 2, 18, 14]
Truth: 17
Final prediction: 2 

523551 523553
Tokens: ['已', '##經']
Predictions: [5, 0]
Truth: 4
Final prediction: 0 

523553 523555
Tokens: ['衰', '##落']
Predictions: [17, 13]
Truth: 17
Final prediction: 17 

523577 523580
Tokens: ['字', '##母', '##表']
Predictions: [13, 13, 14]
Truth: 9
F

In [71]:
print("Most voted:", (np.array(truths) == np.array(final_most_voted)).mean())
print("Logit average:", (np.array(truths) == np.array(final_avg)).mean())
print("Always first:", (np.array(truths) == np.array(final_first)).mean())
print("Random choice:", (np.array(truths) == np.array(final_random)).mean())
print("Highest probability:", (np.array(truths) == np.array(final_max_prob)).mean())
print("Equiprobable random:", (np.array(truths) == np.array(final_random_equi)).mean())

Most voted: 0.4800012373557707
Logit average: 0.490828100349553
Always first: 0.4210721687753271
Random choice: 0.364927150678999
Highest probability: 0.49760262319423393
Equiprobable random: 0.32393974077396603


In [72]:
new_tokens = []
new_preds = []
new_labels = []
prev_end = 0

for start, end in subword_locations:
    if len(set(filtered_preds[start:end])) > 1:
        # Subword predictions do not all agree
        prediction = sum(logits[start:end]).argmax()
    else:
        prediction = filtered_preds[start]
    new_preds += filtered_preds[prev_end:start] + [prediction]
    token = "".join(tokens[start:end]).replace("##", "")
    new_tokens += tokens[prev_end:start] + [token]
    new_labels += labels[prev_end:start] + [labels[start]]
    prev_end = end
    
# Last subword onwards
new_preds += filtered_preds[prev_end:]
new_tokens += tokens[prev_end:]
new_labels += labels[prev_end:]

HBox(children=(FloatProgress(value=0.0, max=127717.0), HTML(value='')))




In [73]:
for token, label in zip(new_tokens[:20], new_labels[:20]):
    print(token, tagset[label])

كتبت VERB
كوري PROPN
شولمان PROPN
, PUNCT
المساعدة NOUN
الخاصة ADJ
ل ADP
أوباما PROPN
في ADP
تدوينة NOUN
نشرت VERB
ها PRON
يوم ADV
الإثنين PROPN
: PUNCT
" PUNCT
فيما ADP
الكثير NOUN
من ADP
عمليات NOUN


In [74]:
for token, label in zip(tokens[:30], labels[:30]):
    print(token, tagset[label])

كتب VERB
##ت VERB
ك PROPN
##وري PROPN
ش PROPN
##ول PROPN
##مان PROPN
, PUNCT
ال NOUN
##مساعدة NOUN
الخاصة ADJ
ل ADP
أو PROPN
##با PROPN
##ما PROPN
في ADP
ت NOUN
##دو NOUN
##ينة NOUN
نشر VERB
##ت VERB
ها PRON
يوم ADV
ال PROPN
##إ PROPN
##ثنين PROPN
: PUNCT
" PUNCT
فيما ADP
الكثير NOUN


In [75]:
(np.array(new_labels) == np.array(new_preds)).mean()

0.6867996536918918

## Old

In [3]:
#lang_path = os.path.join("../data/ud/", "ar")
#pos = open(glob.glob(lang_path + "/*-test.conllu")[0], "r", encoding="utf-8").read()
pos = open("../es_pud-ud-test.conllu", "r", encoding="utf-8").read()[1:]
test_sentences = parse(pos)
test_dataset = convert_examples_to_tf_dataset(examples=test_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
test_dataset = test_dataset.shuffle(10000).batch(32).repeat(1)

In [25]:
model.evaluate(test_dataset)



[0.015721691772341728, 0.9975742101669312]

In [33]:
pos_example = """1	Aunque	_	ADP	IN	_	3	mark	_	_
2	no	_	ADV	RB	Polarity=Neg	3	advmod	_	_
3	haya	haber	VERB	VBC	Aspect=Imp|Mood=Sub|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	23	advcl	_	_
4	precedentes	_	NOUN	NN	Gender=Masc|Number=Plur	3	obj	_	_
5	para	_	ADP	IN	_	8	case	_	_
6	la	el	DET	DT	Definite=Def|Gender=Fem|Number=Sing|PronType=Art	8	det	_	_
7	mayor	_	ADJ	JJR	Degree=Cmp|Gender=Fem|Number=Sing	8	amod	_	_
8	parte	_	NOUN	NN	Gender=Fem|Number=Sing	3	obl	_	_
9	de	_	ADP	IN	_	11	case	_	_
10	la	el	DET	DT	Definite=Def|Gender=Fem|Number=Sing|PronType=Art	11	det	_	_
11	transición	_	NOUN	NN	Gender=Fem|Number=Sing	8	nmod	_	_
12	digital	_	ADJ	JJ	Gender=Fem|Number=Sing	11	amod	_	_
13	en	_	ADP	IN	_	14	case	_	_
14	Estados	_	NOUN	NN	Gender=Masc|Number=Plur	3	obl	_	Proper=True
15	Unidos	_	ADJ	JJ	Gender=Masc|Number=Plur	14	amod	_	SpaceAfter=No|Proper=True
16	,	_	PUNCT	,	_	3	punct	_	_
17	la	el	DET	DT	Definite=Def|Gender=Fem|Number=Sing|PronType=Art	18	det	_	_
18	transición	_	NOUN	NN	Gender=Fem|Number=Sing	23	nsubj	_	_
19	de	_	ADP	IN	_	20	case	_	_
20	poder	_	NOUN	NN	Gender=Masc|Number=Sing	18	nmod	_	_
21	pacífica	_	ADJ	JJ	Gender=Fem|Number=Sing	18	amod	_	_
22	es	_	AUX	VBC	Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Voice=Act	23	cop	_	_
23	habitual	_	ADJ	JJ	Gender=Fem|Number=Sing	0	root	_	SpaceAfter=No
24	,	_	PUNCT	,	_	25	punct	_	_
25	escribió	_	VERB	VBC	Aspect=Perf|Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin|Voice=Act	23	parataxis	_	_
26	el	el	DET	DT	Definite=Def|Gender=Masc|Number=Sing|PronType=Art	27	det	_	_
27	lunes	_	NOUN	NN	Gender=Masc|Number=Sing	25	obl:tmod	_	_
28	en	_	ADP	IN	_	30	case	_	_
29	una	uno	DET	DT	Definite=Ind|Gender=Fem|Number=Sing|PronType=Art	30	det	_	_
30	entrada	_	NOUN	NN	Gender=Fem|Number=Sing	25	obl	_	_
31	de	_	ADP	IN	_	32	case	_	_
32	blog	_	NOUN	NN	Gender=Masc|Number=Sing	30	nmod	_	_
33	Kori	_	PROPN	NNP	Gender=Fem|Number=Sing	25	nsubj	_	_
34	Schulman	_	PROPN	NNP	Gender=Fem|Number=Sing	33	flat:name	_	SpaceAfter=No
35	,	_	PUNCT	,	_	36	punct	_	_
36	Asistente	_	NOUN	NN	Gender=Fem|Number=Sing	33	appos	_	_
37	Especial	_	ADJ	JJ	Gender=Fem|Number=Sing	36	amod	_	_
38	de	de	ADP	INDT	_	40	case	_	_
39	el	el	DET	_	Definite=Def|Gender=Masc|Number=Sing|PronType=Art	40	det	_	_
40	presidente	_	NOUN	NN	Gender=Masc|Number=Sing	36	nmod	_	_
41	Obama	_	PROPN	NNP	Gender=Masc|Number=Sing	40	appos	_	SpaceAfter=No
42	.	_	PUNCT	.	_	23	punct	_	_"""

In [34]:
true_labels = [line.split("\t")[3] for line in pos_example.split("\n")]
tokens = [line.split("\t")[1] for line in pos_example.split("\n")]

In [37]:
ids = tokenizer.subword_tokenize(tokens, true_labels)[0]
ids = tokenizer.convert_tokens_to_ids(ids)
mask = [1] * len(ids)
types = [0] * len(ids)
i = -1
result = {"tokens": [], "predicted_labels": [], "true_labels": []}
print("{:<25}{:<20}{:<20}".format("Token", "Predicted label", "True label"), "\n")
for token, label in zip(ids, model({"input_ids": tf.constant([ids]),
                                    "attention_mask": tf.constant([mask]),
                                    "token_type_ids": tf.constant([types])})[0].numpy().argmax(axis=-1)[0]):
    decoded_token = tokenizer.decode(token)
    if not decoded_token.startswith("# #") or decoded_token[-1] in [",", "."]:
        i += 1
    result["tokens"].append(decoded_token)
    result["predicted_labels"].append(tagset[label])
    result["true_labels"].append(true_labels[i])
    if tagset[label] == true_labels[i]:
        print("{:<25}{:<20}{:<20}".format(decoded_token, tagset[label], true_labels[i]))
    else:
        print("\x1b[31m{:<25}{:<20}{:<20}\x1b[0m".format(decoded_token, tagset[label], true_labels[i]))
print("\nAccuracy:", str(np.mean(np.array(result["predicted_labels"]) == np.array(result["true_labels"])) * 100)[:5] + "%")

Token                    Predicted label     True label           

[31mA u n q u e              SCONJ               ADP                 [0m
[31mn o                      PART                ADV                 [0m
h a y a                  VERB                VERB                
p r e c e d e n t e      NOUN                NOUN                
# # s                    NOUN                NOUN                
p a r a                  ADP                 ADP                 
l a                      DET                 DET                 
m a y o r                ADJ                 ADJ                 
p a r t e                NOUN                NOUN                
d e                      ADP                 ADP                 
l a                      DET                 DET                 
t r a n s i c i ó n      NOUN                NOUN                
d i g i t a l            ADJ                 ADJ                 
e n                      ADP                 ADP        

In [207]:
((len(result["tokens"]) * 0.88) + (512 - len(result["tokens"]))) / 512

0.98828125

In [184]:
test_sentences = pyconll.load_from_file("../es_pud-ud-test.conllu")
test_dataset = convert_examples_to_tf_dataset(examples=test_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
test_dataset = test_dataset.batch(32).repeat(1)
model.evaluate(test_dataset)[1]



0.98828125

In [174]:
tokenizer.decode(test_dataset.as_numpy_iterator().next()[0]["input_ids"][0, :60])

'Aunque no haya precedentes para la mayor parte de la transición digital en Estados Unidos, la transición de poder pacífica es habitual, escribió el lunes en una entrada de blog Kori Schulman, Asistente Especial de el presidente Obama. [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [182]:
np.array([tagset[index] for index in test_dataset.as_numpy_iterator().next()[1][0, :50]]) == np.array(result["true_labels"])

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])

In [41]:
def read_conll(input_file):
        """Reads a conllu file."""
        ids = []
        texts = []
        tags = []
        #
        text = []
        tag = []
        for line in open(input_file, encoding="utf-8"):
            if line.startswith("# sent_id ="):
                idx = line.strip().split()[-1]
                ids.append(idx)
            elif line.startswith("#"):
                pass
            elif line.strip() == "":
                texts.append(text)
                tags.append(tag)
                text, tag = [], []
            else:
                try:
                    splits = line.strip().split()
                    token = splits[1] # the token
                    label = splits[3] # the UD POS Tag label
                    text.append(token)
                    tag.append(label)
                except ValueError:
                    print(idx)
        return ids, texts, tags

In [42]:
ids, texts, tags = read_conll(glob.glob("../data/ud/fi" + "/*-test.conllu")[0])

In [45]:
(np.array(ids) == "n01112014").argmax()

277

In [159]:
print(tags[277])

NameError: name 'tags' is not defined

In [160]:
print(texts[277])

NameError: name 'texts' is not defined

In [32]:
(np.array(labels) == None).argmax()

16

In [34]:
tokens[16]

'ellei'

In [32]:
path = "../data/ud/"
batch_size = 32
pos_eval = {}
for directory in tqdm(os.listdir(path)):
    lang_path = os.path.join(path, directory)
#     pos = open(glob.glob(lang_path + "/*-test.conllu")[0], "r", encoding="utf-8").read()
#     test_sentences = parse(pos)
    test_sentences = pyconll.load_from_file(glob.glob(lang_path + "/*-test.conllu")[0])
    test_dataset = convert_examples_to_tf_dataset(examples=test_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
    test_dataset = test_dataset.shuffle(10000).batch(batch_size).repeat(1)
    pos_eval[directory] = model.evaluate(test_dataset)[1]

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




KeyError: 'tokens'

In [11]:
test_sentences = pyconll.load_from_file(glob.glob("../data/ud/zh" + "/*-test.conllu")[0])
test_dataset = convert_examples_to_tf_dataset(examples=test_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
test_dataset = test_dataset.batch(32).repeat(1)
#model.evaluate(test_dataset)[1]

In [132]:
print([tokenizer.decode(int(token)) for token in test_dataset.as_numpy_iterator().next()[0]["input_ids"][0][:70]])

['"', '雖', '# # 然', '美', '# # 國', '的', '許', '# # 多', '數', '# # 字', '# # 化', '轉', '# # 型', '都', '# # 是', '史', '# # 無', '# # 前', '# # 例', '的', '，', '但', '權', '# # 力', '的', '和', '# # 平', '轉', '# # 移', '卻', '存', '# # 在', '先', '# # 例', '，', '[ U N K ]', '奧', '# # 巴', '# # 馬', '的', '特', '# # 別', '助', '# # 理', '科', '# # 瑞', '·', '舒', '# # 爾', '# # 曼', '在', '周', '一', '發', '# # 布', '的', '博', '# # 客', '中', '寫', '# # 道', '。', '[ P A D ]', '[ P A D ]', '[ P A D ]', '[ P A D ]', '[ P A D ]', '[ P A D ]', '[ P A D ]', '[ P A D ]']


In [18]:
print(tokenizer.subword_tokenize([token.form for token in test_sentences[0]], [3]*100))

(['"', '雖', '##然', '美', '##國', '的', '許', '##多', '數', '##字', '##化', '轉', '##型', '都', '##是', '史', '##無', '##前', '##例', '的', '，', '但', '權', '##力', '的', '和', '##平', '轉', '##移', '卻', '存', '##在', '先', '##例', '，', '[UNK]', '奧', '##巴', '##馬', '的', '特', '##別', '助', '##理', '科', '##瑞', '·', '舒', '##爾', '##曼', '在', '周', '一', '發', '##布', '的', '博', '##客', '中', '寫', '##道', '。'], [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 9, 10, 11, 12, 12, 13, 14, 14, 15, 15, 16, 17, 17, 18, 18, 19, 20, 21, 21, 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 27, 28, 29, 30, 31, 31, 32, 33, 33, 34, 35, 35, 36])


In [19]:
print(tokenizer.convert_tokens_to_ids(tokenizer.subword_tokenize([token.form for token in test_sentences[0]], [3]*100)[0]))

[107, 8282, 115088, 6417, 112786, 5718, 7185, 112957, 4310, 113120, 112415, 7632, 112835, 7838, 114146, 2759, 115084, 112334, 112046, 5718, 10064, 2243, 4769, 112360, 5718, 2833, 113407, 7632, 115731, 2702, 3355, 112797, 2431, 112046, 10064, 100, 3229, 113360, 118276, 5718, 5410, 112316, 2602, 115281, 5954, 115314, 217, 6639, 115159, 114220, 3031, 2822, 2072, 5714, 113367, 5718, 2684, 113162, 2104, 3435, 117527, 1882]


In [24]:
print([tokenizer.decode(token) for token in tokenizer.convert_tokens_to_ids(tokenizer.subword_tokenize([token.form for token in test_sentences[0]], [3]*100)[0])])

['"', '雖', '# # 然', '美', '# # 國', '的', '許', '# # 多', '數', '# # 字', '# # 化', '轉', '# # 型', '都', '# # 是', '史', '# # 無', '# # 前', '# # 例', '的', '，', '但', '權', '# # 力', '的', '和', '# # 平', '轉', '# # 移', '卻', '存', '# # 在', '先', '# # 例', '，', '[ U N K ]', '奧', '# # 巴', '# # 馬', '的', '特', '# # 別', '助', '# # 理', '科', '# # 瑞', '·', '舒', '# # 爾', '# # 曼', '在', '周', '一', '發', '# # 布', '的', '博', '# # 客', '中', '寫', '# # 道', '。']


In [14]:
tokenizer.decode(int(test_dataset.as_numpy_iterator().next()[0]["input_ids"][0][:70][2]))

'# # 然'

In [134]:
tokenizer.decode(test_dataset.as_numpy_iterator().next()[0]["input_ids"][0][:70])

'" 雖然 美國 的 許多 數字化 轉型 都是 史無前例 的 ， 但 權力 的 和平 轉移 卻 存在 先例 ， [UNK] 奧巴馬 的 特別 助理 科瑞 · 舒爾曼 在 周 一 發布 的 博客 中 寫道 。 [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [31]:
true_labels = [token.upos for token in test_sentences[0]]
tokens, true_labels, _ = tokenizer.subword_tokenize([token.form for token in test_sentences[0]], true_labels)
ids = tokenizer.convert_tokens_to_ids(tokens)

result = {"tokens": [], "predicted_labels": [], "true_labels": []}

print("{:<25}{:<20}{:<20}".format("Token", "Predicted label", "True label"), "\n")
for token, label, true_label in zip(ids, model(tf.constant([ids]))[0].numpy().argmax(axis=-1)[0], true_labels):
    decoded_token = tokenizer.decode(token)
    result["tokens"].append(decoded_token)
    result["predicted_labels"].append(tagset[label])
    result["true_labels"].append(true_label)
    if tagset[label] == true_label:
        print("{:<25}{:<20}{:<20}".format(decoded_token, tagset[label], true_label))
    else:
        print("\x1b[31m{:<25}{:<20}{:<20}\x1b[0m".format(decoded_token, tagset[label], true_label))
print("\nAccuracy:", str(np.mean(np.array(result["predicted_labels"]) == np.array(result["true_labels"])) * 100)[:5] + "%")

Token                    Predicted label     True label           

"                        PUNCT               PUNCT               
[31m雖                        INTJ                SCONJ               [0m
[31m# # 然                    PART                SCONJ               [0m
美                        PROPN               PROPN               
# # 國                    PROPN               PROPN               
的                        PART                PART                
[31m許                        NOUN                NUM                 [0m
# # 多                    NUM                 NUM                 
數                        NOUN                NOUN                
# # 字                    NOUN                NOUN                
# # 化                    NOUN                NOUN                
轉                        NOUN                NOUN                
# # 型                    NOUN                NOUN                
[31m都                        AUX              

In [28]:
path = os.path.join("../data/ud/", "fi")
test_data = read_conll("../data/ud/fi/fi_pud-ud-test.conllu")
index = np.where(np.array(test_data[0]) == "n01112014")[0][0]
tokens = test_data[1][index]
true_labels = test_data[2][index]
ids = tokenizer.subword_tokenize(tokens, true_labels)[0]
ids = tokenizer.convert_tokens_to_ids(ids)
mask = [1] * len(ids)
types = [0] * len(ids)
i = -1
result = {"tokens": [], "predicted_labels": [], "true_labels": []}
print("{:<25}{:<20}{:<20}".format("Token", "Predicted label", "True label"), "\n")
for token, label in zip(ids, model({"input_ids": tf.constant([ids]),
                                    "attention_mask": tf.constant([mask]),
                                    "token_type_ids": tf.constant([types])})[0].numpy().argmax(axis=-1)[0]):
    decoded_token = tokenizer.decode(token)
    if not decoded_token.startswith("# #") or decoded_token[-1] in [",", "."]:
        i += 1
    result["tokens"].append(decoded_token)
    result["predicted_labels"].append(tagset[label])
    result["true_labels"].append(true_labels[i])
    if tagset[label] == true_labels[i]:
        print("{:<25}{:<20}{:<20}".format(decoded_token, tagset[label], true_labels[i]))
    else:
        print("\x1b[31m{:<25}{:<20}{:<20}\x1b[0m".format(decoded_token, tagset[label], true_labels[i]))
print("\nAccuracy:", str(np.mean(np.array(result["predicted_labels"]) == np.array(result["true_labels"])) * 100)[:5] + "%")

Token                    Predicted label     True label           

T a m                    ADJ                 ADJ                 
# # m i k                ADJ                 ADJ                 
# # u u                  ADJ                 ADJ                 
# # l l i s              ADJ                 ADJ                 
# # e s s a              ADJ                 ADJ                 
t                        ADJ                 ADJ                 
# # ä r k e              ADJ                 ADJ                 
# # ä s s ä              ADJ                 ADJ                 
s e l v                  NOUN                NOUN                
# # i t y                NOUN                NOUN                
# # k s e s              NOUN                NOUN                
[31m# # s ä                  PRON                NOUN                [0m
t o                      VERB                VERB                
# # d e t t i i n        VERB                VERB                