In [None]:
import pandas as pd
import numpy as np

### Result Evaluation

Reading the input file

In [None]:
input_dataframe = pd.read_json("input/intersentential.json")
display(input_dataframe)

Reading the output files

In [None]:
ff_combi1_output = pd.read_csv("output/intersentential/Flair-FSPOST-Combination-1-intersentential.csv")
ff_combi2_output = pd.read_csv("output/intersentential/Flair-FSPOST-Combination-2-intersentential.csv")
sf_combi1_output = pd.read_csv("output/intersentential/Spacy-FSPOST-Combination-1-intersentential.csv")
sf_combi2_output = pd.read_csv("output/intersentential/Spacy-FSPOST-Combination-2-intersentential.csv")

Functions to be used for results evaluation

In [None]:
def fixed_columns(output):
    output['general_tags'] = output['general_tags'].apply(eval)
    output['specific_tags'] = output['specific_tags'].apply(eval)
    output['token_tagset'] = output['token_tagset'].apply(eval)
    
    return output

In [None]:
def to_1D(series):
    return pd.Series([x for _list in series for x in _list])

In [None]:
ff_combi1_output = fixed_columns(ff_combi1_output)
ff_combi2_output = fixed_columns(ff_combi2_output)
sf_combi1_output = fixed_columns(sf_combi1_output)
sf_combi2_output = fixed_columns(sf_combi2_output)

In [None]:
ff_combi1_tag_counts = to_1D(ff_combi1_output['general_tags']).value_counts()
ff_combi1_total = ff_combi1_tag_counts.sum()

ff_combi2_tag_counts = to_1D(ff_combi2_output['general_tags']).value_counts()
ff_combi2_total = ff_combi2_tag_counts.sum()

sf_combi1_tag_counts = to_1D(sf_combi1_output['general_tags']).value_counts()
sf_combi1_total = sf_combi1_tag_counts.sum()

sf_combi2_tag_counts = to_1D(sf_combi2_output['general_tags']).value_counts()
sf_combi2_total = sf_combi2_tag_counts.sum()

In [None]:
def print_tag_counts(tag_counts, total, tagger):
    print(tag_counts)
    print(tagger, " total tokens: ", total, "\n")

In [None]:
print_tag_counts(ff_combi1_tag_counts, ff_combi1_total, "Flair-FSPOST Combi1")
print_tag_counts(ff_combi2_tag_counts, ff_combi2_total, "Flair-FSPOST Combi2")
print_tag_counts(sf_combi1_tag_counts, sf_combi1_total, "Spacy-FSPOST Combi1")
print_tag_counts(sf_combi2_tag_counts, sf_combi2_total, "Spacy-FSPOST Combi2")

Overall Accuracy

In [None]:
pos_temp = []
tags_list = []

for i in range(len(input_dataframe)):
    pos_temp.clear()
    
    for j in range(input_dataframe.iloc[i].count()):
        pos_temp.append(input_dataframe.iloc[i][j].__getitem__("tag"))
    
    temp = np.array(pos_temp)
    tags_list.append(temp)

In [None]:
test_data_counts = to_1D(tags_list).value_counts()
test_data_counts_total = test_data_counts.sum()

print_tag_counts(test_data_counts, test_data_counts_total, "Test data counts")

In [None]:
def get_overall_accuracy(output):
    accuracy = []
    for i in range(len(tags_list)):
        counter = 0
        for j in range(len(tags_list[i])):
            if tags_list[i][j] == output['general_tags'][i][j]:
                counter = counter + 1
                
        accuracy.append(counter / len(tags_list[i]))
        
    return accuracy

In [None]:
def print_overall_accuracy(output, tagger):
    accuracy = get_overall_accuracy(output)
    print(tagger, ' accuracy: %f' % (sum(accuracy) / len(accuracy)))

In [None]:
print_overall_accuracy(ff_combi1_output, "Flair-FSPOST Combi1")
print_overall_accuracy(sf_combi1_output, "Spacy-FSPOST Combi1")
print_overall_accuracy(ff_combi2_output, "Flair-FSPOST Combi2")
print_overall_accuracy(sf_combi2_output, "Spacy-FSPOST Combi2")

Taking the Accuracy per POS tag

In [None]:
possible_tags = ['NOUN', 'PROPN', 'PR', 'DT', 'LM', 'CONJ', 'CCP', 'IN', 'VB', 'JJ', 'CD', 'RB', 'UH',
                  'TS', 'FW', 'PUNC', 'SYM', 'EX', 'TO', 'ADD', 'POS', 'PDT', 'XX', 'MD', 'AFX']

In [None]:
def get_accuracy_per_tags(output, tag):
    counter_right = 0
    counter_total = 0
    for i in range(len(tags_list)):
        for j in range(len(tags_list[i])):
            if tags_list[i][j] == tag:
                counter_total = counter_total + 1
                if tags_list[i][j] == 'VB':
                    if (output['general_tags'][i][j] == 'VB' or output['general_tags'][i][j] == 'VBPT' or
                    output['general_tags'][i][j] == 'VBPR' or output['general_tags'][i][j] == 'VBFT'):
                        counter_right = counter_right + 1
                elif tags_list[i][j] == output['general_tags'][i][j]:
                    counter_right = counter_right + 1
    
    if counter_total == 0:
        return None
    else:
        return counter_right / counter_total * 100

In [None]:
def print_accuracy_per_tags(output, tagger):
    print("Tagger: ", tagger)
    for i in range(len(possible_tags)):
        accuracy = get_accuracy_per_tags(output, possible_tags[i])
        if accuracy != None:
            print('POS Tag: ', possible_tags[i], ' accuracy: %f' % accuracy)
            
    print("\n")

In [None]:
print_accuracy_per_tags(ff_combi1_output, "Flair-FSPOST Combi1")
print_accuracy_per_tags(sf_combi1_output, "Spacy-FSPOST Combi1")
print_accuracy_per_tags(ff_combi2_output, "Flair-FSPOST Combi2")
print_accuracy_per_tags(sf_combi2_output, "Spacy-FSPOST Combi2")

Genarating Confusion Matrix

In [None]:
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
actual_tags = []
predicted_tags = []
for i in range(len(tags_list)):
    for j in range(len(tags_list[i])):
        actual_tags.append(tags_list[i][j])
        if (ff_combi1_output['general_tags'][i][j] == 'VB' or ff_combi1_output['general_tags'][i][j] == 'VBPT' or
                ff_combi1_output['general_tags'][i][j] == 'VBPR' or ff_combi1_output['general_tags'][i][j] == 'VBFT'):
            predicted_tags.append('VB')
        else:
            predicted_tags.append(ff_combi1_output['general_tags'][i][j])

In [None]:
# dataset_pos_tags = ['NOUN', 'PROPN', 'PR', 'DT', 'LM', 'CONJ', 'CCP', 'IN', 'VB', 'JJ', 'CD', 'RB', 'UH',
#                  'TS', 'FW', 'PUNC', 'SYM', 'EX', 'TO', 'POS', 'XX', 'MD']

# VERB
# dataset_pos_tags = ['NOUN', 'CONJ', 'VB', 'PR', 'JJ', 'RB', 'PUNC', 'DT', 'CCP', 'PROPN', 'MD', 'UH', 'CD', 'TO', 'LM', 'IN']

#Homographs
# dataset_pos_tags = ['NOUN', 'CONJ', 'VB', 'RB', 'PR', 'DT', 'PUNC', 'PROPN', 'JJ', 'CCP', 'CD', 'LM', 'SYM', 'IN', 'XX', 'UH', 'TO', 'MD', 'FW']

#Intersentential
dataset_pos_tags = ['NOUN', 'CONJ', 'PUNC', 'PR', 'VB', 'RB', 'JJ', 'DT', 'PROPN', 'CD', 'IN', 'TO', 'LM', 'CCP', 'SYM', 'UH', 'MD']

In [None]:
def clean_labels(label):
    return str(label).replace("['", '').replace("']", '')

In [None]:
def generate_confusion_matrix(combi):
    actual_tags = []
    predicted_tags = []
    for i in range(len(tags_list)):
        for j in range(len(tags_list[i])):
            actual_tags.append(tags_list[i][j])
            if (combi['general_tags'][i][j] == 'VB' or combi['general_tags'][i][j] == 'VBPT' or
                    combi['general_tags'][i][j] == 'VBPR' or combi['general_tags'][i][j] == 'VBFT'):
                predicted_tags.append('VB')
            else:
                predicted_tags.append(combi['general_tags'][i][j])
            
    np.seterr(invalid='ignore')
    plt.figure(figsize=(60, 40))
    plt.rcParams.update({'font.size': 32})
    cm = metrics.confusion_matrix(actual_tags, predicted_tags, labels=dataset_pos_tags, normalize='true')
    cm_df = pd.DataFrame(cm, columns=dataset_pos_tags)
    order = np.argsort(-cm_df.to_numpy().diagonal())
    
    label_df = pd.DataFrame(dataset_pos_tags)
    label_df = label_df.iloc[order].to_numpy()
    
    labels = []
    for i in range(len(label_df)):
        labels.append(clean_labels(label_df[i]))
    
    fx = sns.heatmap(cm_df.iloc[order, order], annot=True, fmt=".2f", cmap=plt.cm.Blues)
    fx.set_title('Confusion Matrix \n')
    fx.set_xlabel('\n Predicted Values\n')
    fx.set_ylabel('\n Actual Values\n')
    fx.xaxis.set_ticklabels(labels)
    fx.yaxis.set_ticklabels(labels)
    plt.show()
    
    print(metrics.classification_report(actual_tags, predicted_tags))

In [None]:
generate_confusion_matrix(ff_combi1_output)

In [None]:
generate_confusion_matrix(sf_combi1_output)

In [None]:
generate_confusion_matrix(ff_combi2_output)

In [None]:
generate_confusion_matrix(sf_combi2_output)