### Data Interpretation

In [1]:
import xarray
import pandas as pd
import os

In [103]:
def get_predictions(events, activations):
    """Get the predictions for a given event (activation) from an NDL activation matrix.
    
    Input:
    ------
    activations - str 
        Path to a .nc file containing an actiation matrix with dimensions event (y-axis) and outcomes (x-axis).
        
    Output:
    -------
    table - dictionary
        Dictionary with event number as keys and a nested dictionary of the prediction and the actual value as values. 
        Example: {event0: {':'ity'}}
    """
    with xarray.open_dataarray(activations) as acts:  
        acts = acts.to_pandas()
    
    # Get max values (row index aka the suffix) for all columns. Convert to dictionary. 
    activations_dict = acts.idxmax()
    activations_dict = activations_dict.to_dict()
    
    events = pd.read_csv(events, sep = '\t', usecols = ['Outcomes']).squeeze("columns")
    predictions = {}
    
    for event, prediction in activations_dict.items():
        entry = 'event'+ str(event)
        predictions[entry] = {'prediction': prediction, 'suffix':events[event]}
    
    return predictions     
    
    
def evaluation(predictions):
    """Get evaluation of predictions.
    Percentage: Dividing the number of correct predictions by the total number of predictions.
    
    Input:
    -----
    predictions - dictionary
        Dictionary with events as keys and a nested dictionary of the prediction and the actual value as values.
        
    Output:
    ------
    
    """
    truePos = 0
    falsePos = 0
    falseNeg = 0
    correctPred = 0

    for item in predictions.items():
        if item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ity':
            truePos += 1
        elif item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ness':
            falsePos += 1
        elif item[1]['prediction'] == 'ness' and item[1]['suffix'] == 'ity':
            falseNeg += 1
            
    for item in predictions.items():
        if item[1]['prediction'] == 'ity' and item[1]['suffix'] == 'ity' or item[1]['prediction'] == 'ness' and item[1]['suffix'] == 'ness':
            correctPred += 1
    
    precision = truePos / (truePos + falsePos)
    recall = truePos / (truePos + falseNeg)
    
    f1 = 2 * (precision * recall) / (precision + recall)
    percentage = correctPred / len(predictions)
    
    return f1, str(percentage) + '%'

In [104]:
# F1 score and correct prediction percentage for Model 1
predictions = get_predictions(events = '../data/m1_syllable.tsv', activations = "../data/m1_activations.nc")
evaluation(predictions)

(0.78300803673938, '0.6648936170212766%')

In [105]:
# F1 score and correct prediction percentage for Model 2
predictions = get_predictions(events = '../data/m2_separate.tsv', activations = "../data/m2_activations.nc")
evaluation(predictions)

(0.8746594005449592, '0.8368794326241135%')

In [106]:
# F1 score and correct prediction percentage for Model 3
predictions = get_predictions(events = '../data/m3_separate_filtered.tsv', activations = "../data/m3_activations.nc")
evaluation(predictions)

(0.8778523489932886, '0.8386524822695035%')