# BERT EVALUATION

### IMPORTS, VARIABLES

In [1]:
import conllu
import sklearn.metrics
import pandas as pd
import matplotlib.pyplot as plt
import time

pd.set_option('display.max_rows', None)

In [2]:
xpos_predictions = './bert/polXPOS-model/test_predictions.txt'
upos_predictions = './bert/polUPOS-model/test_predictions.txt'

xpos_standard = './bert/hist_test_XPOS/test.txt'
upos_standard = './bert/hist_test_UPOS/test.txt'

labels_xpos = './bert/data_XPOS/labels.txt'
labels_upos = './bert/data_UPOS/labels.txt'

In [3]:
from functions import get_measures

### FUNCTIONS

In [4]:
def get_labels(filename: str):
    with open(filename) as f:
        labels = f.readlines()[1:]
        labels = [x.strip() for x in labels]
        
    return labels

In [5]:
def get_tags_and_tokens(filename: str):
    with open(filename) as f:
        tags = f.readlines()
        tokens = [x.strip().split()[0] for x in tags if len(x.strip()) > 0]
        tags = [x.strip().split()[1] for x in tags if len(x.strip()) > 0]
        
    return tokens, tags

In [6]:
def get_comparison(standard: list, predictions: list, tokens: list):
    '''A function that calculates and prints out the accuracy of the lemmatization.
    
    Args:
        standard (list): A list of lists of gold standard lemmas.
        predictions (list): A list of lists of predicted lemmas.
    
    Returns:
        A Pandas dataframe containing the mismatched lemmas.
    '''
    
    problematic = []
    for i, ann in enumerate(predictions):
        if standard[i] != ann:
            problematic.append((tokens[i], standard[i], predictions[i]))
            
    problematic_frame = pd.DataFrame(problematic, columns=['Token', 'Gold Standard', 'Prediction'])
    
    return problematic_frame

### EXECUTION

In [7]:
labels_upos_list = get_labels(labels_upos)
labels_xpos_list = get_labels(labels_xpos)

In [8]:
upos_token_list, upos_standard_list = get_tags_and_tokens(upos_standard)
xpos_token_list, xpos_standard_list = get_tags_and_tokens(xpos_standard)

In [9]:
_, upos_predictions_list = get_tags_and_tokens(upos_predictions)
_, xpos_predictions_list = get_tags_and_tokens(xpos_predictions)

In [10]:
# needed for replacing '_'s
for i,item in enumerate(xpos_standard_list):
    if item == None:
        xpos_standard_list[i] = '_'

In [11]:
get_measures(xpos_standard_list, xpos_predictions_list, labels=labels_xpos_list)

MEASURES:
Accuracy: 85.51%
Matthew's Correlation Coefficient: 85.04%

MEASURES PER CLASS:
Precision:
	adj:pl:acc:f:com: 0.00%
	adj:pl:acc:f:pos: 0.00%
	adj:pl:acc:f:sup: 0.00%
	adj:pl:acc:m1:com: 0.00%
	adj:pl:acc:m1:pos: 0.00%
	adj:pl:acc:m1:sup: 0.00%
	adj:pl:acc:m2:pos: 0.00%
	adj:pl:acc:m2:sup: 0.00%
	adj:pl:acc:m3:com: 0.00%
	adj:pl:acc:m3:pos: 75.00%
	adj:pl:acc:m3:sup: 0.00%
	adj:pl:acc:n:com: 0.00%
	adj:pl:acc:n:pos: 0.00%
	adj:pl:acc:n:sup: 0.00%
	adj:pl:dat:f:com: 0.00%
	adj:pl:dat:f:pos: 0.00%
	adj:pl:dat:m1:com: 0.00%
	adj:pl:dat:m1:pos: 0.00%
	adj:pl:dat:m1:sup: 0.00%
	adj:pl:dat:m2:pos: 0.00%
	adj:pl:dat:m3:com: 0.00%
	adj:pl:dat:m3:pos: 0.00%
	adj:pl:dat:n:pos: 0.00%
	adj:pl:dat:n:sup: 0.00%
	adj:pl:gen:f:com: 0.00%
	adj:pl:gen:f:pos: 100.00%
	adj:pl:gen:f:sup: 0.00%
	adj:pl:gen:m1:com: 0.00%
	adj:pl:gen:m1:pos: 44.44%
	adj:pl:gen:m1:sup: 0.00%
	adj:pl:gen:m2:pos: 0.00%
	adj:pl:gen:m2:sup: 0.00%
	adj:pl:gen:m3:com: 0.00%
	adj:pl:gen:m3:pos: 50.00%
	adj:pl:gen:m3:sup: 0.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
get_measures(upos_standard_list, upos_predictions_list, labels_upos_list)

MEASURES:
Accuracy: 93.20%
Matthew's Correlation Coefficient: 92.29%

MEASURES PER CLASS:
Precision:
	ADJ: 90.29%
	ADP: 99.24%
	ADV: 88.99%
	AUX: 88.66%
	CCONJ: 98.47%
	DET: 92.31%
	INTJ: 0.00%
	NOUN: 94.31%
	NUM: 98.21%
	PART: 78.32%
	PRON: 93.71%
	PROPN: 77.29%
	PUNCT: 100.00%
	SCONJ: 90.24%
	SYM: 0.00%
	VERB: 94.06%
	X: 86.54%
Recall:
	ADJ: 90.39%
	ADP: 98.74%
	ADV: 88.20%
	AUX: 83.07%
	CCONJ: 94.83%
	DET: 82.66%
	INTJ: 0.00%
	NOUN: 94.81%
	NUM: 83.33%
	PART: 85.51%
	PRON: 90.41%
	PROPN: 87.75%
	PUNCT: 100.00%
	SCONJ: 92.50%
	SYM: 0.00%
	VERB: 95.06%
	X: 63.38%



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
print(upos_token_list)

['Dziad', 'mój', 'Melchior', 'urodzony', 'roku', '1741', 'we', 'wsi', 'Godziszewo', 'parafii', 'Rypnin', 'syn', 'komornika', 'ziemskiego', 'Jana', 'Komornika', 'ziemi', 'Dobrrzyńskiej', '(', 'a', 'nie', 'Antoniego', '–', 'Antoni', 'był', 'starszym', 'bratem', 'ś', '.', 'p', '.', 'Melchiora', '–', 'i', 'nie', 'rozumiem', 'skąd', 'się', 'wzięła', 'ta', 'myłka', ',', 'która', 'później', 'wdokumentach', 'się', 'powtarza', ')', 'i', 'matki', 'Ewy', 'z', 'Pinińskich', 'właścicieli', 'Dóbr', 'Strużewo', 'z', 'adlinencjami', 'Puszczanki', ',', 'Dąbrowy', 'części', ',', 'Ronantowizna', ',', 'Żółtowizna', ',', 'Będowszczyazna', ',', 'Kmińszczyzna', ',', 'Bęklowizna', ',', 'Ruszkowizna', '.', 'A', 'że', 'śp', 'Dziad', 'mój', 'był', 'najmłodszy', 'i', 'piąty', 'z', 'pomiędzy', 'Rodzeństwa', '–', 'odstąpił', 'swoją', 'część', 'najstarszemu', 'bratu', 'swojemu', 'Mateuszowi', 'komornikowi', 'ziemi', 'Dobrzyńskiej', 'za', 'ośm', 'tysięcy', 'tynfów', '.', 'Byli', 'jeszcze', 'bracia', 'Wawrzyniec', ','

In [14]:
comparison_upos = get_comparison(upos_standard_list, upos_predictions_list, upos_token_list)
comparison_upos.to_excel('../data/mistakes/bert_UPOS_mistakes.xlsx')

In [15]:
comparison_xpos = get_comparison(xpos_standard_list, xpos_predictions_list, xpos_token_list)
comparison_xpos.to_excel('../data/mistakes/bert_XPOS_mistakes.xlsx')

In [16]:
comparison_upos

Unnamed: 0,Token,Gold Standard,Prediction
0,Melchior,PROPN,NOUN
1,nie,PART,CCONJ
2,wdokumentach,NOUN,ADV
3,Ronantowizna,PROPN,NOUN
4,Żółtowizna,PROPN,NOUN
5,Kmińszczyzna,PROPN,NOUN
6,Dobrzyńskiej,PROPN,ADJ
7,jedną,NUM,ADJ
8,było,AUX,VERB
9,było,AUX,VERB


In [17]:
comparison_xpos

Unnamed: 0,Token,Gold Standard,Prediction
0,urodzony,adj:sg:nom:m1:pos,ppas:sg:nom:m1:perf:aff
1,parafii,subst:sg:loc:f,subst:sg:gen:f
2,Rypnin,subst:sg:nom:m3,subst:sg:nom:m1
3,nie,part,conj
4,starszym,adj:sg:inst:m1:com,adj:sg:inst:m1:pos
5,p,brev:npun,brev:pun
6,wdokumentach,subst:pl:loc:m3,adj:pl:nom:m1:pos
7,adlinencjami,subst:pl:inst:f,subst:pl:inst:n:ncol
8,Ronantowizna,subst:sg:nom:f,subst:sg:gen:n:ncol
9,Żółtowizna,subst:sg:nom:f,subst:sg:inst:f
