# BERT EVALUATION

### IMPORTS, VARIABLES

In [1]:
import conllu
import sklearn.metrics
import pandas as pd
import matplotlib.pyplot as plt
import time

pd.set_option('display.max_rows', None)

In [2]:
xpos_predictions = './bert/polXPOS-model/test_predictions.txt'
upos_predictions = './bert/polUPOS-model/test_predictions.txt'
file_3k = '../data/memoirs_3k_corrected.conllu'
file_10k = '../data/memoirs_10k_corrected.conllu'

labels_xpos = './bert/data_XPOS/labels.txt'
labels_upos = './bert/data_UPOS/labels.txt'

In [3]:
from functions import extract_conllu_data, get_measures

### FUNCTIONS

In [4]:
def get_labels(filename: str):
    with open(filename) as f:
        labels = f.readlines()[1:]
        labels = [x.strip() for x in labels]
        
    return labels

In [5]:
def get_preds(filename: str):
    with open(filename) as f:
        preds = f.readlines()
        preds = [x.strip().split()[1] for x in preds if len(x.strip()) > 0]
        
    return preds

### EXECUTION

In [6]:
labels_upos_list = get_labels(labels_upos)
labels_xpos_list = get_labels(labels_xpos)

In [7]:
upos_standard_list = extract_conllu_data(file_10k,'upos', sentences=False)[1]
xpos_standard_list = extract_conllu_data(file_3k, 'xpos', sentences=False)[1]

In [8]:
upos_predictions_list = get_preds(upos_predictions)
xpos_predictions_list = get_preds(xpos_predictions)

In [9]:
# get upos preds to work
len(xpos_standard_list) == len(xpos_predictions_list)

True

In [10]:
print(xpos_standard_list[:10])
print(xpos_predictions_list[:10])

['subst:sg:nom:m1', 'adj:sg:nom:m1:pos', 'subst:sg:nom:m1', 'adj:sg:nom:m1:pos', 'subst:sg:gen:m3', 'adj:sg:gen:m3', 'prep:loc:wok', 'subst:sg:loc:f', 'subst:sg:nom:n:ncol', 'subst:sg:loc:f']
['subst:sg:nom:m1', 'adj:sg:nom:m1:pos', 'subst:sg:nom:m1', 'ppas:sg:nom:m1:perf:aff', 'subst:sg:gen:m3', 'adj:sg:gen:m3:pos', 'prep:loc:wok', 'subst:sg:loc:f', 'subst:sg:nom:n:ncol', 'subst:sg:gen:f']


In [11]:
# needed for replacing '_'s
for i,item in enumerate(xpos_standard_list):
    if item == None:
        xpos_standard_list[i] = '_'

In [12]:
sklearn.metrics.accuracy_score(xpos_standard_list, xpos_predictions_list)

0.8484756097560976

In [13]:
get_measures(xpos_standard_list, xpos_predictions_list, labels=labels_xpos_list)

MEASURES:
Accuracy: 84.85%
Matthew's Correlation Coefficient: 84.37%

MEASURES PER CLASS:
Precision:
	_: 90.00%
	adj:pl:acc:f:com: 0.00%
	adj:pl:acc:f:pos: 0.00%
	adj:pl:acc:f:sup: 0.00%
	adj:pl:acc:m1:com: 0.00%
	adj:pl:acc:m1:pos: 0.00%
	adj:pl:acc:m1:sup: 0.00%
	adj:pl:acc:m2:pos: 0.00%
	adj:pl:acc:m2:sup: 0.00%
	adj:pl:acc:m3:com: 0.00%
	adj:pl:acc:m3:pos: 75.00%
	adj:pl:acc:m3:sup: 0.00%
	adj:pl:acc:n:com: 0.00%
	adj:pl:acc:n:pos: 0.00%
	adj:pl:acc:n:sup: 0.00%
	adj:pl:dat:f:com: 0.00%
	adj:pl:dat:f:pos: 0.00%
	adj:pl:dat:m1:com: 0.00%
	adj:pl:dat:m1:pos: 0.00%
	adj:pl:dat:m1:sup: 0.00%
	adj:pl:dat:m2:pos: 0.00%
	adj:pl:dat:m3:com: 0.00%
	adj:pl:dat:m3:pos: 0.00%
	adj:pl:dat:n:pos: 0.00%
	adj:pl:dat:n:sup: 0.00%
	adj:pl:gen:f:com: 0.00%
	adj:pl:gen:f:pos: 100.00%
	adj:pl:gen:f:sup: 0.00%
	adj:pl:gen:m1:com: 0.00%
	adj:pl:gen:m1:pos: 37.50%
	adj:pl:gen:m1:sup: 0.00%
	adj:pl:gen:m2:pos: 100.00%
	adj:pl:gen:m2:sup: 0.00%
	adj:pl:gen:m3:com: 0.00%
	adj:pl:gen:m3:pos: 50.00%
	adj:pl:ge

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
