# BERT EVALUATION

### IMPORTS, VARIABLES

In [1]:
import pandas as pd

In [None]:
lemma_errors = pd.read_excel('../data/results/all_lemmas_annotated.xlsx')
xpos_errors = pd.read_excel('../data/results/all_xpos_annotated.xlsx')
upos_errors = pd.read_excel('../data/results/all_upos_annotated.xlsx')

### FUNCTIONS

In [None]:
def get_error_stats(errors: list):
    '''A function intended for creating and displaying statistics for manually annotated errors.
    
    Args:
        errors (DataFrame): a Pandas DataFrame created by reading from an .xlsx file containing manually annotated errors.
        
    Returns:
        A DataFrane containing the raw and relative frequencies for every error type found in the annotation.    
    '''
    stats = []
    for error in set(errors['Error Type']):
        raw = errors['Error Type'].value_counts()[error]
        relative = raw / len(errors)
        
        stats.append([error, raw, relative])
        
    stats_pd = pd.DataFrame(
        stats, 
        columns=['error', 'raw', 'relative']
    ).sort_values('relative', ascending=False).set_index('error')

    return stats_pd

### EXECUTION 

In [None]:
get_error_stats(lemma_errors)

In [None]:
get_error_stats(xpos_errors)

In [None]:
get_error_stats(upos_errors)