In [1]:
import xarray
import pandas as pd
import os

In [119]:
def get_predictions(events, activations):
    """Get the predictions for a given event (activation) from an NDL activation matrix.
    
    Input:
    ------
    activations - str 
        Path to a .nc file containing an actiation matrix with dimensions event (y-axis) and outcomes (x-axis).
        
    Output:
    -------
    table - dictionary
        Dictionary with event number as keys and a nested dictionary of the prediction and the actual value as values. 
        Example: {event0: {':'ity'}}
    """
    with xarray.open_dataarray(activations) as acts:  
        acts = acts.to_pandas()
    
    # Get max values (row index aka the suffix) for all columns. Convert to dictionary. 
    activations_dict = acts.idxmax()
    activations_dict = activations_dict.to_dict()
    
    events = pd.read_csv(events, sep = '\t', usecols = ['Outcomes']).squeeze("columns")
    predictions = {}
    
    for event, prediction in activations_dict.items():
        entry = 'event'+ str(event)
        predictions[entry] = {'prediction': prediction, 'suffix':events[event]}
    
    return predictions     
    
    
def evaluation(predictions):
    """Get evaluation of predictions.
    Percentage: Dividing the number of correct predictions by the total number of predictions.
    
    Input:
    -----
    predictions - dictionary
        Dictionary with events as keys and a nested dictionary of the prediction and the actual value as values.
        
    Output:
    ------
    
    """
    truePos_ity = 0
    falsePos_ity = 0
    falseNeg_ity = 0
    truePos_ness = 0
    falsePos_ness = 0
    falseNeg_ness = 0
    correctPred = 0

    for item in predictions.items():
        if item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ity':
            truePos_ity += 1
        elif item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ness':
            falsePos_ity += 1
        elif item[1]['prediction'] == 'ness' and item[1]['suffix'] == 'ity':
            falseNeg_ity += 1
    
    
    for item in predictions.items():
        if item[1]['prediction'] == 'ness' and item[1]['suffix'] == 'ness':
            truePos_ness += 1
        elif item[1]['prediction'] == 'ness' and item[1]['suffix'] == 'ity':
            falsePos_ness += 1
        elif item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ness':
            falseNeg_ness += 1
            
    for item in predictions.items():
        if item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ity' or item[1]['prediction'] == 'ness' and item[1]['suffix'] == 'ness':
            correctPred += 1
    
    precision_ity = truePos_ity / (truePos_ity + falsePos_ity)
    recall_ity = truePos_ity / (truePos_ity + falseNeg_ity)
    
    precision_ness = truePos_ness / (truePos_ness + falsePos_ness)
    recall_ness = truePos_ness / (truePos_ness + falseNeg_ness)
    
    f1_ity = 2 * (precision_ity * recall_ity) / (precision_ity + recall_ity)
    f1_ness = 2 * (precision_ness * recall_ness) / (precision_ness + recall_ness)
    
    percentage = correctPred / len(predictions)
    macro = (f1_ity + f1_ness) / 2
    
    return 'f1 for ity: ', f1_ity, 'f1 for ness', f1_ness, 'macro-averaged: ', macro, str(percentage) + '%'

In [120]:
# F1 score and correct prediction percentage for Model 1
predictions = get_predictions(events = '../data/m1_syllable.tsv', activations = "../data/m1_activations.nc")
evaluation(predictions)

('f1 for ity: ',
 0.78300803673938,
 'f1 for ness',
 0.2645914396887159,
 'macro-averaged: ',
 0.523799738214048,
 '0.6648936170212766%')

In [121]:
# F1 score and correct prediction percentage for Model 2
predictions = get_predictions(events = '../data/m2_separate.tsv', activations = "../data/m2_activations.nc")
evaluation(predictions)

('f1 for ity: ',
 0.8746594005449592,
 'f1 for ness',
 0.766497461928934,
 'macro-averaged: ',
 0.8205784312369466,
 '0.8368794326241135%')

In [122]:
# F1 score and correct prediction percentage for Model 3
predictions = get_predictions(events = '../data/m3_separate_filtered.tsv', activations = "../data/m3_activations.nc")
evaluation(predictions)

('f1 for ity: ',
 0.8778523489932886,
 'f1 for ness',
 0.7624020887728461,
 'macro-averaged: ',
 0.8201272188830673,
 '0.8386524822695035%')