In [None]:
import copy
import os.path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from hit_prediction_code.analytics import get_results_as_dataframe
from hit_prediction_code import analytics

results = get_results_as_dataframe(
    project_name='hit-prediction-ismir2020',
    table_name='hit_prediction',
    filter_git_dirty=True,
    date_filter='> \'2021-08-04 16:20:00\'',
    columns=['id', 'sourcefile', 'outcome'],
#     filters=['sourcefile LIKE \'plans/hspd/%%.py\''],
    filters=['sourcefile LIKE \'plans/hspd/binary_class_%%_melspect%%fcn%%.py\''],
)

analytics.add_approach_to_df(results)
analytics.add_cv_epoch_evaluator_outcome_to_df(results)

results.sort_values(by='sourcefile', inplace=True)
display(len(results))

In [None]:
def compute_acc(cm):
    def cm_acc(matrix):
        tn, fp, fn, tp = np.array(matrix).ravel()
        
        return (tn + tp) / (tn + fp + fn + tp)
    
    acc = 0
    for m in cm:
        acc += cm_acc(m)
    
    return acc / len(cm)

def compute_f1(cm):
    def cm_f1(matrix):
        tn, fp, fn, tp = np.array(matrix).ravel()
        
        return (2 * tp) / (2 * tp + fp + fn)
    
    f1 = 0
    for m in cm:
        f1 += cm_f1(m)
    
    return f1 / len(cm)

for _, row in results.iterrows():
    acc = row['mean'].loc['multilabel_confusion_matrix'].apply(compute_acc)
    f1 = row['mean'].loc['multilabel_confusion_matrix'].apply(compute_f1)
#     print(row['id'], row['sourcefile'], acc)
    print(row['id'], row['sourcefile'], f'acc: {acc.max().round(3)}', f'f1: {f1.max().round(3)}')
#     print(row['mean'].max(axis=1))
#     print(row['mean'].loc['multilabel_confusion_matrix'][-1])

In [None]:
results['final_outcome'] = results['mean'].apply(lambda v: v.tail())
# results['best_outcome'] = results['mean'].apply(lambda v: v.max(axis=1))

def display_outcome():
    metrics = ['f1_macro', 'precision_macro', 'recall_macro']
    outcome = []
    for _, row in results.iterrows():
        out = {}
        out['id'] = row['id']
        best = row['mean'].max(axis=1)
        for m in metrics:
            out[m] = best[m]
        outcome.append(out)

    outcome = pd.DataFrame(outcome).merge(results[['id', 'sourcefile']], on=['id'])
    outcome['name'] = outcome['id'].apply(str) + ' ' + outcome['sourcefile']
    outcome[['name'] + metrics].plot.bar(x='name', title='best', figsize=(24,6))
    plt.show()

results = results.sort_values(by=['sourcefile'])
# display(results[['id', 'sourcefile', 'best_epoch']])

display_outcome()