# GATE CLOUD UD TAGGER EVALUATION

### IMPORTS, VARIABLES

In [1]:
import requests
import math
import sklearn.metrics
import pandas as pd
from tqdm import tqdm
import time

pd.set_option('display.max_rows', None)

In [2]:
# from https://cloud.gate.ac.uk/shopfront/displayItem/tagger-pos-pl-maxent1 - "A POS tagger for pl / Polish using the Universal 
# Dependencies POS tagset.
# instructions on how to use the API from https://cloud.gate.ac.uk/info/help/online-api.html

URL = 'https://cloud-api.gate.ac.uk/process/tagger-pos-pl-maxent1'
PARAMS = {
    'Content-Type': 'text/plain',
    'Accept': 'application/json'
}

# because I am reusing code from an earlier project, it will be easier for me to import the data from
file = '../data/memoirs_10k_corrected.conllu'
test_file = '../data/ud-treebanks/UD_Polish-PDB/pl_pdb-ud-test.conllu'

In [3]:
from functions import *
from preproc_bert import remove_ranges

### FUNCTIONS AND CLASSES

In [4]:
class TaggerAnnotations:
    '''A class intended for retrieving, processing, and storing the annotations from an online tagger.
    
    Attributes:
        processed_annotations (list[list]): A list of lists, every element of which represents a number of values returned by the
            tagger. The values include the token, the POS tag, the confidence thereof, a list of possible targets and a list
            of those confidences.
        only_annotations (list[str]): A list containing only the POS tags obtained from the tagger.  
        data (list[str]): The original input data.
    '''
    def __init__(self, url: str, headers: dict, data: list, batch_size: int = 50):
        '''The __init__ method of the class.
        Retrieves the tagger annotations (in batches, if needed), constructs processed_annotations and only_annotations using
        the __retrieve_anns method.
        
        Args:
            url (str): The URL to which the query should be made.
            headers (dict): The parameters that the query requires.
            data (list[list[str]]): The original, unannotated data represented as a list of lists of strings.
            batch_size (int): The number of sentences to be batched together.
        '''
        self.processed_annotations = []
        self.only_annotations = []
        self.data = data      
    
        for i, sent in enumerate(tqdm(data, desc='Loading sentences...')):
            if i % batch_size == 0:
                time.sleep(3)

                #sent = ' '.join(sent)
                try:
                    sent = data[i:(i+batch_size)]
                except IndexError:
                    sent = data[i:]
                    
                sent = [x for sentence in sent for x in sentence]
                sent = ' '.join(sent)

                r = requests.post(url=url, data=sent.encode('utf-8'), headers=headers) 
                annotations = r.json()
                
                try:
                    for entry in annotations['entities']['Token']:
                        self.__retrieve_anns(entry)
                except KeyError:
                    print(annotations)
                    
            else:
                continue
                
        self.__sanity_check()  
        
        # constructing self.only_annotations
        for entry in self.processed_annotations:
            self.only_annotations.append(entry[1])

    def print_annotations(self):
        '''A method of the class which prints out all words with their annotation and the confidence thereof.
        '''
        for entry in self.processed_annotations:
            print(entry[0] + ' | ' + entry[1] + ' | ' + str(entry[2]))
            
    def __retrieve_anns(self, entry: dict):
        '''A method of the class which reads the data returned by the tagger and stores the relevant elements in appropriate
        lists.
        
        Args:
            entry (dict): An entry returned by the tagger.
        '''
        word = entry['string']
        pos = entry['upos']
        confidence = entry['LF_confidence']
        target_list = entry['LF_target_list']
        confidence_list = entry['LF_confidence_list']

        annotation = [word, pos, confidence, target_list, confidence_list]
        self.processed_annotations.append(annotation)
               
    def __sanity_check(self):
        '''A method of the class that allows for the fixing of length mismatched between the input and the output due
        to the tagger tokenizing the text differently than the annotator. The annotation lists only preserve the annotation
        corresponding to the last part of the mistokenized item, as that one most commonly defines the word class. This works 
        up to mistokenizations of the length of 3.
        '''
        unique_combos = []
        for j, word in enumerate([x for sentence in self.data for x in sentence]):
            word_combos = []
            for k, inter in enumerate(self.processed_annotations):
                word_combos.append((word,inter))
            unique_combos.append(word_combos)
            
            # retrieving the correct annotations, inserting special message if no annotation was found 
        fixed_sentence = []
        rejected_inters = 0
        for j, current_word in enumerate(tqdm([x for sentence in self.data for x in sentence], desc='Checking annotation...')):
            current_options = unique_combos[j]
            winning_option = False

            for k in range(rejected_inters, len(current_options)):
                option = current_options[k]
                if option[0] == option[1][0] and not winning_option:  # if the forms match and it's the first match
                    # retrieving the lemma
                    winning_option = option[1]
                    rejected_inters = k  # all current_options are same length for a sentence, we reject already used ones

            if not winning_option:  # if there was no match between forms - data was misparsed
                winning_option = ['MISPARSED', 'MISPARSED', 'MISPARSED', 'MISPARSED', 'MISPARSED']

            fixed_sentence.append(winning_option)
        
        self.processed_annotations = fixed_sentence
        
        if len([x for sentence in self.data for x in sentence]) == len(self.processed_annotations):
            print('Loading annotations successful!')
        else:
            print('Annotations could not be matched to the original tokens.')

        
    def __len__(self):
        '''The __len__ magic method of the class.
            
        Returns:
            The length of self.only_annotations, which should be identical to the length of self.processed_annotations.
        '''
        return len(self.only_annotations)
        
    def __getitem__(self, index: int):
        '''The __getitem__ magic method of the class.
            
        Args:
            index (int): The index signifying the desired element.
            
        Returns:
            A string representing the combination of the original token and the tagger annotation.
        '''
        full_ann = self.processed_annotations[index]
        token = full_ann[0]
        annotation = full_ann[1]
        item = '_'.join([token, annotation])
            
        return item

In [5]:
def chunk_requests(test_tokens):
    '''A function that splits a larger request into smaller chunks to accommodate the limitations of the tagger.
    
    Args:
        test_tokens: A list of lists of tokens to be annotated, with sublists corresponding to sentences.
    
    Returns:
        A list of strings representing the annotations assigned to the given tokens.
    '''
    all_test_annotations = []
    for i in range(0, len(test_tokens), 250):
        try:
            test_tokens_chunk = test_tokens[i:i+250]
        except IndexError:
            test_tokens_chunk = test_tokens[i:]

        test_tagger_anns = TaggerAnnotations(URL, PARAMS, test_tokens_chunk)
        all_test_annotations += test_tagger_anns.only_annotations
    
    return all_test_annotations

### EXECUTION - STANDARD

In [6]:
test_tokens_features = extract_conllu_data(test_file, 'upos', sentences=True, combined=True, fulltext=False)
test_tokens, test_features = make_tagger_friendly(test_tokens_features)

In [7]:
# due to limitations of the tagger, giving it more than 30k tokens is impossible within one notebook - even across separate
# instances of the class object; therefore, only a chunk of the test data can be fed to the tagger 
test_tokens1 = test_tokens[:1100]
test_tokens2 = test_tokens[1100:]

In [8]:
all_test_annotations1 = chunk_requests(test_tokens1)

Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:31<00:00,  8.06it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 2903/2903 [00:00<00:00, 8286.22it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:18<00:00, 13.63it/s]
Checking annotation...: 100%|████████████████████████████████████████████████████| 1775/1775 [00:00<00:00, 11070.75it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:21<00:00, 11.90it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 2577/2577 [00:00<00:00, 9207.27it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:27<00:00,  9.11it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 4323/4323 [00:00<00:00, 4906.17it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 100/100 [00:11<00:00,  8.94it/s]
Checking annotation...: 100%|████████████████████████████████████████████████████| 1767/1767 [00:00<00:00, 12372.72it/s]


Loading annotations successful!


In [9]:
# due to the tagger limitations this part had to be run once the daily quota has been reset
all_test_annotations2 = chunk_requests(test_tokens2)

Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:28<00:00,  8.77it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 4382/4382 [00:00<00:00, 4917.59it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:28<00:00,  8.65it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 4464/4464 [00:00<00:00, 4634.63it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:37<00:00,  6.65it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 5779/5779 [00:01<00:00, 3780.37it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 250/250 [00:26<00:00,  9.54it/s]
Checking annotation...: 100%|█████████████████████████████████████████████████████| 3951/3951 [00:00<00:00, 5452.00it/s]


Loading annotations successful!


Loading sentences...: 100%|███████████████████████████████████████████████████████████| 115/115 [00:13<00:00,  8.47it/s]
Checking annotation...: 100%|████████████████████████████████████████████████████| 1696/1696 [00:00<00:00, 13314.60it/s]

Loading annotations successful!





In [10]:
all_test_annotations = all_test_annotations1 + all_test_annotations2

In [11]:
get_measures([x for sentence in test_features for x in sentence], all_test_annotations, details=True)

MEASURES:
Accuracy: 90.98%
Precision (weighted): 91.17%
Recall (weighted): 90.98%
F1 (weighted): 90.95%
Matthew's Correlation Coefficient: 89.59%

MEASURES PER CLASS:
Precision:
	ADJ: 83.86%
	ADP: 96.65%
	ADV: 79.69%
	AUX: 87.67%
	CCONJ: 93.72%
	DET: 94.64%
	INTJ: 0.00%
	NOUN: 90.82%
	NUM: 73.62%
	PART: 89.62%
	PRON: 94.35%
	PROPN: 83.40%
	PUNCT: 99.93%
	SCONJ: 88.62%
	SYM: 0.00%
	VERB: 89.46%
	X: 52.24%
Recall:
	ADJ: 91.58%
	ADP: 98.89%
	ADV: 75.89%
	AUX: 82.98%
	CCONJ: 87.14%
	DET: 72.96%
	INTJ: 0.00%
	NOUN: 92.91%
	NUM: 70.57%
	PART: 80.69%
	PRON: 94.06%
	PROPN: 92.29%
	PUNCT: 99.72%
	SCONJ: 63.56%
	SYM: 0.00%
	VERB: 88.86%
	X: 41.42%



### EXECUTION - PREDICTIONS

In [12]:
tokens_features = extract_conllu_data(file, 'upos', sentences=True, combined=True, fulltext=False)
tokens, features = make_tagger_friendly(tokens_features)

In [13]:
tagger_anns = TaggerAnnotations(URL, PARAMS, tokens, batch_size=20)

Loading sentences...: 100%|███████████████████████████████████████████████████████████| 360/360 [01:18<00:00,  4.58it/s]
Checking annotation...: 100%|███████████████████████████████████████████████████| 10286/10286 [00:05<00:00, 1715.33it/s]


Loading annotations successful!


In [14]:
get_measures(features, tagger_anns.only_annotations, details=True)

MEASURES:
Accuracy: 83.41%
Precision (weighted): 84.12%
Recall (weighted): 83.41%
F1 (weighted): 83.06%
Matthew's Correlation Coefficient: 81.17%

MEASURES PER CLASS:
Precision:
	ADJ: 66.73%
	ADP: 96.18%
	ADV: 75.09%
	AUX: 84.08%
	CCONJ: 95.58%
	DET: 94.58%
	NOUN: 80.08%
	NUM: 77.00%
	PART: 83.77%
	PRON: 85.54%
	PROPN: 69.77%
	PUNCT: 100.00%
	SCONJ: 83.81%
	VERB: 81.64%
	X: 56.60%
Recall:
	ADJ: 75.08%
	ADP: 97.64%
	ADV: 64.20%
	AUX: 78.33%
	CCONJ: 95.58%
	DET: 44.55%
	NOUN: 82.56%
	NUM: 57.89%
	PART: 62.62%
	PRON: 81.98%
	PROPN: 92.60%
	PUNCT: 99.92%
	SCONJ: 44.22%
	VERB: 86.35%
	X: 45.45%



In [15]:
full_table = get_full_table(
    features, 
    tagger_anns.only_annotations, 
    tokens, 
    confidence=[x[2] for x in tagger_anns.processed_annotations]
)
full_table.to_excel('../data/results/UD_upos.xlsx')

In [16]:
comparison = get_comparison(
    features, 
    tagger_anns.only_annotations, 
    tokens, 
    confidence=[x[2] for x in tagger_anns.processed_annotations])
    
comparison.to_excel('../data/mistakes/UD_upos_mistakes.xlsx')

In [17]:
comparison

Unnamed: 0,Token,Context,Gold Standard,Prediction,Confidence
0,syn,Rypnin syn komornika,NOUN,ADP,0.446107
1,Komornika,Jana Komornika ziemi,NOUN,PROPN,0.967253
2,ziemi,Komornika ziemi Dobrrzyńskiej,NOUN,VERB,0.494722
3,Dobrrzyńskiej,ziemi Dobrrzyńskiej (,ADJ,PROPN,0.601537
4,właścicieli,Pinińskich właścicieli Dóbr,NOUN,VERB,0.440838
5,Dóbr,właścicieli Dóbr Strużewo,NOUN,PROPN,0.997932
6,śp,że śp Dziad,X,NOUN,0.792097
7,Dziad,śp Dziad mój,NOUN,PROPN,0.780149
8,mój,Dziad mój był,DET,VERB,0.513002
9,najmłodszy,był najmłodszy i,ADJ,NOUN,0.540951
