# ERROR EXTRACTION AND COMPARISON

### IMPORTS, VARIABLES

In [1]:
import pandas as pd

pd.set_option('display.max_rows', None)

In [2]:
# lemmatization results
morfeusz_lemmas = pd.read_excel('../data/results/Morfeusz_lemmas.xlsx', index_col=0)
stanza_lemmas = pd.read_excel('../data/results/stanza_lemmas.xlsx', index_col=0)

# UPOS results
bert_upos = pd.read_excel('../data/results/bert_UPOS.xlsx', index_col=0)
marmot_upos = pd.read_excel('../data/results/marmot_UPOS.xlsx', index_col=0)
stanza_upos = pd.read_excel('../data/results/stanza_upos.xlsx', index_col=0)
ud_upos = pd.read_excel('../data/results/UD_upos.xlsx', index_col=0)

# XPOS results
bert_xpos = pd.read_excel('../data/results/bert_XPOS.xlsx', index_col=0)
marmot_xpos = pd.read_excel('../data/results/marmot_XPOS.xlsx', index_col=0)
morfeusz_xpos = pd.read_excel('../data/results/Morfeusz_XPOS.xlsx', index_col=0)
stanza_xpos = pd.read_excel('../data/results/stanza_xpos.xlsx', index_col=0)

In [3]:
# grouping up results
lemmas = [morfeusz_lemmas, stanza_lemmas]
upos = [bert_upos, marmot_upos, stanza_upos, ud_upos]
xpos = [bert_xpos, marmot_xpos, morfeusz_xpos, stanza_xpos]

# creating labels
lemma_labels = ['Morfeusz', 'Stanza'] 
upos_labels = ['BERT', 'Marmot', 'Stanza', 'UD']
xpos_labels = ['BERT', 'Marmot', 'Morfeusz', 'Stanza']

### FUNCTIONS AND CLASSES

In [4]:
def compare_results(results: list, sources: list):
    '''A function which can compare 2 or 4 DataFrames containing tagger prediction results and return a DataFrame containing
    those tokens and predictions where at least 2 of the taggers' outputs did not match the Gold Standard. In addition, the 
    function prints out a message about the number of those instances as well as displays the whole DF.
    
    Args:
        results (list[DataFrame]): A list of DataFrames containing tagger predictions in a specific format.
        sources (list[str]): A list of names of the taggers corresponding to the DFs in results.
    
    Returns:
        A Pandas DataFrame containing the elements with at least two wrong predictions.
    '''
    # sanity checks
    # finish if all the DFs are not of the same length
    if len({len(x) for x in results}) != 1:
        print('The inputs are of unequal length, cannot conduct a comparison!')
        return
    # finish if the DF list and the label list are not of the same length
    if len(sources) != len(results):
        print('There is a mismatch between the input and labels, cannot conduct a comparison!')
        return
    # finish if the input list is not of length 2 or 4
    if len(sources) != 2 and len(sources) != 4:
        print('This function can only conduct comparisons between 2 or 4 DataFrames!')
        return
    
    # creating a master DataFrame with all the results
    for i, result in enumerate(results):
        # renaming the Prediction column to include the tagger name
        source = sources[i]
        result = result.rename(columns = {'Prediction': f'{source} Prediction'})
        # dropping confidence as it is not as relevant to the comparison
        if 'Confidence' in result.columns.values.tolist():
            result = result.drop(columns=['Confidence'])
        # initializing the master DF    
        if i == 0:
            full_results = result
        # picking out only the predictions and concatenating them with the master DF
        else:  # anything other than the first element
            result = result.drop(columns=['Token', 'Context', 'Gold Standard'])
            full_results = pd.concat([full_results, result], axis=1, join='inner')
    
    # picking out which rows are preserved
    if len(sources) == 4:
        # 6 alternative conditions are needed if at least 2 predictions out of 4 must not match the Golden Standard
        full_results = full_results[
            ((full_results['Gold Standard'] != full_results[f'{sources[0]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[1]} Prediction']))
            |
            ((full_results['Gold Standard'] != full_results[f'{sources[0]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[2]} Prediction']))
            |
            ((full_results['Gold Standard'] != full_results[f'{sources[0]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[3]} Prediction']))
            |
            ((full_results['Gold Standard'] != full_results[f'{sources[1]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[2]} Prediction']))
            |
            ((full_results['Gold Standard'] != full_results[f'{sources[1]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[3]} Prediction']))
            |
            ((full_results['Gold Standard'] != full_results[f'{sources[2]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[3]} Prediction']))
        ]
    elif len(sources) == 2:
        # the elimination is trivial for a comparison between 2 DFs
        full_results = full_results[
            (full_results['Gold Standard'] != full_results[f'{sources[0]} Prediction']) &
            (full_results['Gold Standard'] != full_results[f'{sources[1]} Prediction']) 
        ]
    
    # return
    print(f'There are {len(full_results)} instances where at least two predictions do not match the Gold Standard.')    
    display(full_results)
    return full_results
    

In [5]:
lemma_results = compare_results(lemmas, lemma_labels)
lemma_results.to_excel('../data/results/all_lemmas.xlsx')

There are 148 instances where at least two predictions do not match the Gold Standard.


Unnamed: 0,Token,Context,Gold Standard,Morfeusz Prediction,Stanza Prediction
17,Dobrrzyńskiej,ziemi Dobrrzyńskiej (,dobrzyńska,Dobrrzyńskiej,dobrrzyńska
27,ś,bratem ś .,świętej,ś,być
29,p,. p .,pamięci,List_świętego_Piotra,pan
52,Pinińskich,z Pinińskich właścicieli,Piniński,Pinińskich,piniński
54,Dóbr,właścicieli Dóbr Strużewo,dobra,Dobra,dzbr
58,Puszczanki,"adlinencjami Puszczanki ,",Puszczanka,puszczanka,puszczanka
138,Floyrana,i Floyrana .,Floyran,Floyrana,floyrana
150,Muczynowską,z Muczynowską –,Muczynowska,Muczynowską,muczynowska
158,Rzotoławskim,za Rzotoławskim –,Rzotoławski,Rzotoławskim,rzotoławski
183,Niewiem,. Niewiem o,niewiedzieć,Niewiem,niewiem


In [6]:
upos_results = compare_results(upos, upos_labels)
upos_results.to_excel('../data/results/all_upos.xlsx')

There are 972 instances where at least two predictions do not match the Gold Standard.


Unnamed: 0,Token,Context,Gold Standard,BERT Prediction,Marmot Prediction,Stanza Prediction,UD Prediction
15,Komornika,Jana Komornika ziemi,NOUN,PROPN,PROPN,PROPN,PROPN
17,Dobrrzyńskiej,ziemi Dobrrzyńskiej (,ADJ,ADJ,ADJ,PROPN,PROPN
27,ś,bratem ś .,X,X,AUX,AUX,X
52,Pinińskich,z Pinińskich właścicieli,PROPN,PROPN,ADJ,ADJ,PROPN
54,Dóbr,właścicieli Dóbr Strużewo,NOUN,NOUN,PROPN,NOUN,PROPN
60,Dąbrowy,", Dąbrowy części",PROPN,PROPN,ADJ,ADJ,PROPN
77,śp,że śp Dziad,X,X,NOUN,PROPN,NOUN
78,Dziad,śp Dziad mój,NOUN,NOUN,NOUN,PROPN,PROPN
97,Dobrzyńskiej,ziemi Dobrzyńskiej za,PROPN,ADJ,ADJ,ADJ,PROPN
99,ośm,za ośm tysięcy,NUM,X,NUM,NUM,NOUN


In [7]:
xpos_results = compare_results(xpos, xpos_labels)
xpos_results.to_excel('../data/results/all_xpos.xlsx')

There are 523 instances where at least two predictions do not match the Gold Standard.


Unnamed: 0,Token,Context,Gold Standard,BERT Prediction,Marmot Prediction,Morfeusz Prediction,Stanza Prediction
0,Dziad,Dziad mój,subst:sg:nom:m1,subst:sg:nom:m1,subst:sg:nom:m3,subst:sg:acc:f,subst:sg:nom:m1
1,mój,Dziad mój Melchior,adj:sg:nom:m1:pos,adj:sg:nom:m1:pos,adj:sg:nom:m3:pos,adj:sg:nom:m3:pos,adj:sg:nom:m1:pos
2,Melchior,mój Melchior urodzony,subst:sg:nom:m1,subst:sg:nom:m1,subst:sg:nom:m3,subst:sg:nom:m3,subst:sg:nom:m1
3,urodzony,Melchior urodzony roku,ppas:sg:nom:m1:perf:aff,ppas:sg:nom:m1:perf:aff,ppas:sg:nom:m3:perf:aff,adj:sg:nom:m3:pos,ppas:sg:nom:m1:perf:aff
8,Godziszewo,wsi Godziszewo parafii,subst:sg:nom:n:ncol,subst:sg:nom:n:ncol,subst:sg:acc:n:ncol,subst:sg:nom:n:ncol,subst:sg:nom:m1
9,parafii,Godziszewo parafii Rypnin,subst:sg:loc:f,subst:sg:gen:f,subst:sg:gen:f,subst:sg:gen:f,subst:sg:gen:f
10,Rypnin,parafii Rypnin syn,subst:sg:nom:m3,subst:sg:nom:m3,subst:sg:nom:m1,subst:sg:nom:m1,subst:sg:nom:m1
25,starszym,był starszym bratem,adj:sg:inst:m1:com,adj:sg:inst:m1:pos,adj:sg:inst:m3:pos,adj:sg:inst:m1:com,adj:sg:inst:m1:com
27,ś,bratem ś .,brev:pun,brev:pun,aglt:sg:sec:imperf:nwok,brev:pun,aglt:sg:sec:imperf:nwok
29,p,. p .,brev:npun,brev:pun,brev:pun,brev:npun,brev:pun
