In [4]:
from statistics import mean, stdev
from sklearn.metrics import r2_score, mean_squared_error

import import_ipynb
import numpy as np
import matplotlib.pyplot as plt

from fetch_data import fewer_plot_names
#from bias_inspection import detect_bias

In [None]:
def create_dict(y_pred, y_true, ids):
    data_dict = {}
    for s_id in ids['ids']:
        if s_id in data_dict:
            continue
        data_dict[s_id] = {}
        for g_id in ids['goal_ids']:
            if g_id.startswith(s_id):
                if g_id in data_dict[s_id]:
                    continue
                data_dict[s_id][g_id] = {'y_pred': [], 'y_true': -1}
                for y in range(len(y_pred)):
                    if ids['goal_ids'][y] == g_id:
                        data_dict[s_id][g_id]['y_pred'].append(y_pred[y])
                        data_dict[s_id][g_id]['y_true'] = y_true[y]
    
    return data_dict

In [None]:
def get_macro_scores(data):
    mean_accuracy = 0
    std_cp = 0
    
    for s_id in data:
        accuracies = []
        avg_cp = 0
        for g_id in data[s_id]:
            cp = -1
            data[s_id][g_id]['accuracy'] = 0
            for y_idx in range(len(data[s_id][g_id]['y_pred'])):
                if np.argmax(data[s_id][g_id]['y_pred'][y_idx]) == np.argmax(data[s_id][g_id]['y_true']):
                    data[s_id][g_id]['accuracy'] += 1
                    cp = y_idx + 1
                else:
                    cp = -1
            if cp == -1:
                avg_cp += (len(data[s_id][g_id]['y_pred']) + 1) / len(data[s_id][g_id]['y_pred'])
            else:
                avg_cp += cp / len(data[s_id][g_id]['y_pred'])
            data[s_id][g_id]['accuracy'] /= len(data[s_id][g_id]['y_pred'])
            accuracies.append(data[s_id][g_id]['accuracy'])
        std_cp += avg_cp / len(data[s_id])
        mean_accuracy += mean(accuracies)
    
    return {'mean_accuracy': mean_accuracy/len(data), 'std_cp': std_cp/len(data)}

In [None]:
def get_micro_scores(y_pred, y_true, joint_ids):
    accuracy = 0
    ratings = {'correct': [], 'incorrect': []}
    for y_idx in range(len(y_pred)):
        if np.argmax(y_pred[y_idx]) == np.argmax(y_true[y_idx]):
            accuracy += 1
            rating = float(joint_ids['prev_responses'][y_idx]['rating'])
            if rating != -1:
                ratings['correct'].append(rating)
        else:
            rating = float(joint_ids['prev_responses'][y_idx]['rating'])
            if rating != -1:
                ratings['incorrect'].append(rating)
            #print(joint_ids['prev_responses'][y_idx]['response'])
            
    print('pos_rating_avg: ', mean(ratings['correct']), stdev(ratings['correct']),
          '\tneg_rating_avg: ', mean(ratings['incorrect']), stdev(ratings['incorrect']))
    
    fig, ax = plt.subplots()
    ax.boxplot(ratings.values())
    ax.set_xticklabels(ratings.keys())

    plt.show()
    
    return {'accuracy': accuracy / len(y_pred)}, ratings

In [None]:
def post_process_results(y_pred, prev_goals):
    for idx in range(len(y_pred)):
        while fewer_plot_names[np.argmax(y_pred[idx])] in prev_goals:
            if prev_goals.index(fewer_plot_names[np.argmax(y_pred[idx])]) != len(prev_goals)-1:
                y_pred[idx][np.argmax(y_pred[idx])] = 0
            
    return y_pred

In [None]:
def performance_test_CV(data, y_pred, y_true, joint_ids, set_name, seq_length=20, 
                        extension="", inspect_bias=True):
    y_pred = post_process_results(y_pred, joint_ids['prev_goals'])
    goals_dict = create_dict(y_pred, y_true, joint_ids)
    micro_results, ratings = get_micro_scores(y_pred, y_true, joint_ids)
    
    result_metrics = {'goals_macro': get_macro_scores(goals_dict),
                      'goals_micro': micro_results}
    
    if set_name == 'TEST':
        return result_metrics, ratings
    
    return result_metrics