ref: https://www.davidsbatista.net/blog/2018/05/09/Named_Entity_Evaluation/

In [1]:
%%capture
# run libery location py file
%run /home/crimex/CRIMEX/ner_eval.py

import sys
sys.path.append('../src/')

In [8]:
from ner_eval import Evaluator
import os
import pandas as pd
import ast
import sklearn.metrics
import numpy as np
from itertools import chain
from sklearn.metrics import matthews_corrcoef, confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

dir_path = '/home/crimex/CRIMEX/predicted_result/'

ent_type_eval = ['Criminal', 'Victim', 'Action', 'Location', 'Datetime',  'Item', 
                 'Rootcause', 'Trigger','worth', 'Enforcement']

for file_name in os.listdir(dir_path):
    if file_name.endswith('.xlsx'):
        file_path = os.path.join(dir_path, file_name)
        print(file_path)
        
        # read xlsx file 
        df = pd.read_excel(file_path)
        df.columns = ['Predict', 'Real']
        
        # get test dataset 
        y_pred = []
        y_test = []

        for ind in range(0, df.shape[0]):
            y_pred.append(ast.literal_eval(df['Predict'][ind]))
            y_test.append(ast.literal_eval(df['Real'][ind]))
            
        # infor eval ------------------------------------------------------------------------------
        evaluator = Evaluator(y_test, y_pred, ent_type_eval)
        results, results_agg = evaluator.evaluate()
        
        
        # token eval ------------------------------------------------------------------------------
#         flatten_true_predictions= list(chain.from_iterable(y_pred))
#         flatten_true_labels= list(chain.from_iterable(y_test))
        flatten_true_predictions = []
        flatten_true_labels = []

        for i in range (0,len(y_test)):
        #     print(y_test[i])
            for k in range(0,len(y_test[i])):
        #         print(y_test[i][k])
                if y_test[i][k] != 'pad'and y_pred[i][k] != 'pad': 
                    flatten_true_predictions.append(y_pred[i][k])
                    flatten_true_labels.append(y_test[i][k])
        # printing result
        r = sklearn.metrics.confusion_matrix(flatten_true_labels, flatten_true_predictions)
        r
        cm = r.astype('float') / r.sum(axis=1)[:, np.newaxis]
        # cm
        # cm.diagonal()
        
        # MCC -------------------
        from sklearn.metrics import matthews_corrcoef, confusion_matrix
        tag_map = {'B-Criminal': 'Criminal',
                   'I-Criminal': 'Criminal',
                   'B-Action': 'Action',
                   'I-Action': 'Action',
                   'B-Location': 'Location',
                   'I-Location': 'Location',
                   'B-Item': 'Item',
                   'I-Item': 'Item',
                   'B-Victim': 'Victim',
                   'I-Victim': 'Victim',
                   'B-worth': 'Worth',
                   'I-worth': 'Worth',
                   'B-Datetime': 'Datetime',
                   'I-Datetime': 'Datetime',
                   'B-Enforcement': 'Enforcement',
                   'I-Enforcement': 'Enforcement',
                   'B-Rootcause': 'Rootcause',
                   'I-Rootcause': 'Rootcause',
                   'B-Trigger': 'Trigger',
                   'I-Trigger': 'Trigger',
                   'O': 'O'}

        # Convert the original tags to merged tags
        merged_true_labels = [tag_map[tag] for tag in flatten_true_labels]
        merged_true_predictions = [tag_map[tag] for tag in flatten_true_predictions]

        ny_true = np.array(merged_true_labels)
        ny_pred = np.array(merged_true_predictions)

        # mcc report (str)
        mcc_report = "+--------------+-----------------------+\n"
        mcc_report += f"| MCC report \n"
        mcc_report += "+--------------+-----------------------+\n"

        # Calculate MCC for all labels
        mcc_all = matthews_corrcoef(ny_true, ny_pred)
        # print("MCC (all labels):", mcc_all)
        

        # Calculate MCC for each label
        for label in np.unique(ny_true):
            y_true_label = np.where(ny_true == label, 1, 0)
            y_pred_label = np.where(ny_pred == label, 1, 0)
            mcc_label = matthews_corrcoef(y_true_label, y_pred_label)
        #     print(f"MCC (label {label}):", mcc_label)
            mcc_report += f"| {label:<12} |  {mcc_label}\n"

        mcc_report += "+---------------+-----------------------+\n"
        mcc_report += f"| {'Overall':<12} |  {mcc_all}\n"
        mcc_report += "+---------------+-----------------------+\n"
#         print(mcc_report)
        
        # ACC -------------------
        from sklearn.metrics import accuracy_score
        accuracy_all = dict()
        for label in np.unique(ny_true):
            y_true_label = np.where(ny_true == label, 1, 0)
            y_pred_label = np.where(ny_pred == label, 1, 0)
            accuracy = accuracy_score(y_true_label, y_pred_label)
            accuracy_all[label] = accuracy
        # print("Accuracy (all labels):", np.mean(list(accuracy_all.values())))

        acc_report = "+--------------+-----------------------+\n"
        acc_report += f"| ACC report \n"
        acc_report += "+--------------+-----------------------+\n"

        for label, accuracy in accuracy_all.items():
        #     print(f"Accuracy (label {label}):", accuracy)
            acc_report += f"| {label:<12} |  {accuracy}\n"

        acc_report += "+---------------+-----------------------+\n"
        acc_report += f"| {'Overall':<12} |  {np.mean(list(accuracy_all.values()))}\n"
        acc_report += "+---------------+-----------------------+\n"
#         print(acc_report)

        # precision -------------------
        from sklearn.metrics import precision_score
        precision_all = dict()
        for label in np.unique(ny_true):
            y_true_label = np.where(ny_true == label, 1, 0)
            y_pred_label = np.where(ny_pred == label, 1, 0)
            precision = precision_score(y_true_label, y_pred_label)
            precision_all[label] = precision
        # print("Accuracy (all labels):", np.mean(list(accuracy_all.values())))

        pre_report = "+--------------+-----------------------+\n"
        pre_report += f"| precison report \n"
        pre_report += "+--------------+-----------------------+\n"

        for label, precision in precision_all.items():
        #     print(f"Accuracy (label {label}):", accuracy)
            pre_report += f"| {label:<12} |  {precision}\n"

        pre_report += "+---------------+-----------------------+\n"
        pre_report += f"| {'Overall':<12} |  {np.mean(list(accuracy_all.values()))}\n"
        pre_report += "+---------------+-----------------------+\n"
#         print(pre_report)

        from sklearn.metrics import classification_report
        report = classification_report(ny_true, ny_pred, labels=np.unique(ny_true))
#         print(report)

        # save result eval ------------------------------------------------------------------------------
        re_path = dir_path+'semeval_result/' + file_name[:-5]+'_semeval.txt'
        print(re_path, '\n-----------')
        
        with open(re_path , 'w') as file:   
            file.write('\n-----------------------------------------------------------------\n\n')
            file.write('classification_report\n')
            file.write(report)
            file.write('\n-----------------------------------------------------------------\n\n')
            file.write(mcc_report)
            file.write('\n-----------------------------------------------------------------\n\n')
            file.write(acc_report)
            file.write('\n-----------------------------------------------------------------\n\n')
            file.write(pre_report)
            file.write('\n-----------------------------------------------------------------\n\nOver All\n')
            for re in results:
                file.write(re)
                file.write('\n')
                file.write(str(results[re]))
                file.write('\n')

            file.write('\n-----------------------------------------------------------------\n\nEach Labels\n\n')
            for re_agg in results_agg:
                file.write(re_agg)
                file.write('\n')
                for re in results_agg[re_agg]: 
                    file.write(re)
                    file.write('\n')
                    file.write(str(results_agg[re_agg][re]))
                    file.write('\n')
                file.write('\n\n')
            file.close()
        
print("-------------FINISH------------")       

/home/crimex/CRIMEX/predicted_bert/xlmr_std_sent.xlsx
/home/crimex/CRIMEX/predicted_bert/semeval_result/xlmr_std_sent_semeval.txt 
-----------
/home/crimex/CRIMEX/predicted_bert/final_sent_bilstm.xlsx
/home/crimex/CRIMEX/predicted_bert/semeval_result/final_sent_bilstm_semeval.txt 
-----------
/home/crimex/CRIMEX/predicted_bert/wang_last.xlsx
/home/crimex/CRIMEX/predicted_bert/semeval_result/wang_last_semeval.txt 
-----------
/home/crimex/CRIMEX/predicted_bert/xlmr_last.xlsx
/home/crimex/CRIMEX/predicted_bert/semeval_result/xlmr_last_semeval.txt 
-----------
/home/crimex/CRIMEX/predicted_bert/final_sent_crf.xlsx
/home/crimex/CRIMEX/predicted_bert/semeval_result/final_sent_crf_semeval.txt 
-----------
/home/crimex/CRIMEX/predicted_bert/std_chunk.xlsx
/home/crimex/CRIMEX/predicted_bert/semeval_result/std_chunk_semeval.txt 
-----------
-------------FINISH------------


In [7]:
print("-------------FINISH------------")  

-------------FINISH------------
