# Notebook for quantitative evaluation of document and aspect based sentiment analysis using labels assigned manually to articles.

This prepares confusion matrices.

### Imports

In [None]:
import numpy as np
import pandas as pd
import os
import ast

# Evaluation of Document Based Sentiment Analysis

### Read data

You might need to specify the directory.

"siebert-roberta"

"financial-bert"

"auditor_sentiment"

"twitter-roberta"

"financial-roberta"

In [None]:
document_based = True#False
model_suffix = 'financial-roberta'#'financial-bert'#'deberta'
#'siebert-roberta'
#'document_based_sentiment_analysis'#
results_directory_name = 'document_based_sentiment_analysis'#'aspect_based_sentiment_analysis\\test_predicitons_few_models'
#'aspect_based_sentiment_analysis\\test_predicitons_few_models'
#f'full_dataseet_overall_sentiment_{model_suffix}.csv'#
results_file_name = f'full_dataseet_overall_sentiment_{model_suffix}.csv'#f'testset_extended_results_absa_{model_suffix}.csv'
folder_with_labels = 'extended_test_labels'
confusion_matrix_folder = 'confusion_matrices'

In [None]:
df_test_absa_model = pd.read_csv(os.path.join('..', results_directory_name,  results_file_name))#,

In [None]:
df_test_absa_model

The chunk of code below assigns to each article a class based od model output.

In [None]:
results_dict = {}
if document_based:
    results_doc_model = {}
    for i, row in df_test_absa_model.iterrows():

        results_dict[row['Unnamed: 0']] = {'overall': int(row.overall_sentiment_name)}

else:


    for i, row in df_test_absa_model.iterrows():
        keywords = ast.literal_eval(row.keywords_sentiment)[0]
        ners = ast.literal_eval(row.ner_sentiment)[0]
        #keywords = [keyword.strip() for keyword in keywords]

        aspects = keywords | ners

        label = None
        score = -1
        results = {}
        for aspect in aspects.keys():
            #print(aspect)
            #print(aspects[aspect][0])
            for l in aspects[aspect][0]:
                if l['score'] > score:
                    score = l['score']
                    label = l['label']
            #numeric_label = None
            if label == 'Negative':
                numeric_label = -1
            elif label == 'Neutral':
                numeric_label = 0
            else:
                numeric_label = 1
            results[aspect] = numeric_label

        results_dict[row['Unnamed: 0']] = (results)


In [None]:
results_dict

In [None]:
#results_dict

In [None]:
df_test_absa_model

The chunk of code below reads the data from files filled by annotators. This will be treated as ground truth and evaluated against it.

In [None]:
df_test_annotated = pd.DataFrame()
for file in os.listdir(f'./{folder_with_labels}'):
    df_test_annotated = pd.concat([df_test_annotated, pd.read_excel(f'./{folder_with_labels}' + '/' +file)])
print(df_test_annotated)

In [None]:
df_test_annotated

Code below is responsible for parsing the text from files prepared by labelers.

In [None]:
results_annotation = {}
results_annotation_overall = {}
for i, row in df_test_annotated.iterrows():
    keywords = ast.literal_eval(row.keywords_lower)
    ners = ast.literal_eval(row.ner_list)

    aspects = keywords + ners

    results = {}
    for aspect in aspects:
        results[aspect.split(':')[0]] = aspect.split(':')[1] if ':' in aspect else None

    results_annotation[row['Unnamed: 0']] = (results)

    if pd.notnull(row.overall):
        results_annotation_overall[row['Unnamed: 0']] = {'overall': str(int(row.overall))}
    else:
        results_annotation_overall[row['Unnamed: 0']] = {'overall': None}


In [None]:
results_annotation_overall

In [None]:
results_annotation

In [None]:
results_dict

In [None]:
def eval_class_1_vs_0(annotated, pred, tp, fp, fn, tn):

    if annotated == '1' and pred == 1:
        tp += 1
    if annotated == '1' and pred == 0:
        fn += 1
    if annotated == '0' and pred == 1:
        fp += 1
    if annotated == '0' and pred == 0:
        tn += 1

    return tp, fp, fn, tn

def eval_class_1_vs_min1(annotated, pred, tp, fp, fn, tn):

    if annotated == '1' and pred == 1:
        tp += 1
    if annotated == '1' and pred == -1:
        fn += 1
    if annotated == '-1' and pred == 1:
        fp += 1
    if annotated == '-1' and pred == -1:
        tn += 1

    return tp, fp, fn, tn

def eval_class_0_vs_min1(annotated, pred, tp, fp, fn, tn):

    if annotated == '0' and pred == 0:
        tp += 1
    if annotated == '0' and pred == -1:
        fn += 1
    if annotated == '-1' and pred == 0:
        fp += 1
    if annotated == '-1' and pred == -1:
        tn += 1

    return tp, fp, fn, tn

In [None]:
def eval_core(results_dict, compare_with, type, document_based):
    tp = 0
    fp = 0
    fn = 0
    tn = 0

    for i in results_dict.keys():
        for j in results_dict[i].keys():
            try:
                if compare_with[i][j] is None:# or pd.isna(compare_with[i][j]):
                    continue
            except Exception as e:
                print(e)
                continue

            #print(compare_with[i][j], results_dict[i][j])
            #print()
            predicted_label = results_dict[i][j]
            if document_based == True:
                predicted_label = predicted_label - 1

            if type == '1vs0':
                tp, fp, fn, tn = eval_class_1_vs_0(compare_with[i][j], predicted_label, tp, fp, fn, tn)
            elif type == '1vsmin1':
                tp, fp, fn, tn = eval_class_1_vs_min1(compare_with[i][j], predicted_label, tp, fp, fn, tn)
            elif type == '0vsmin1':
                tp, fp, fn, tn = eval_class_0_vs_min1(compare_with[i][j], predicted_label, tp, fp, fn, tn)


    return tp, fp, fn, tn

In [None]:
cm = np.empty((3,3,))
cm[:] = np.nan

In [None]:
cm

In [None]:
compare_with = results_annotation_overall

In [None]:
tp, fp, fn, tn = eval_core(results_dict, compare_with, '1vs0', document_based)
print(tp, fp, fn, tn)

In [None]:
cm[0,0], cm[0,1], cm[1,0], cm[1,1] = tp, fp, fn, tn
print(cm)

In [None]:
tp, fp, fn, tn = eval_core(results_dict, compare_with, '1vsmin1', document_based)
print(tp, fp, fn, tn)

In [None]:
cm[0,0], cm[0,2], cm[2,0], cm[2,2] = tp, fp, fn, tn
print(cm)

In [None]:
tp, fp, fn, tn = eval_core(results_dict, compare_with, '0vsmin1', document_based)
print(tp, fp, fn, tn)

In [None]:
cm[1,1], cm[1,2], cm[2,1], cm[2,2] = tp, fp, fn, tn
print(cm)

In [None]:
with open(f'{confusion_matrix_folder}/{model_suffix}.npy', 'wb') as f:
    np.save(f, cm)

In [None]:
np.load(f'{confusion_matrix_folder}/{model_suffix}.npy')