In [None]:
import os.path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from dbispipeline.analytics import get_results_as_dataframe

results = get_results_as_dataframe(project_name='hit-prediction-ismir2020', table_name='hit_prediction')
results = results[(results['date'] > '2020-12-01')]
results['mean'] = results['outcome'].apply(lambda v: pd.DataFrame(v['mean']))
results['approach'] = results['sourcefile'].apply(lambda v: os.path.splitext(os.path.basename(v))[0].replace('unique_', '').replace('wide_and_deep', 'wd'))

best_results = []
for _, row in results.iterrows():
    row = pd.DataFrame(pd.concat([row, row['mean'].max(axis=1).abs()], axis=0)).transpose()

    best_results.append(row)
best_results = pd.concat(best_results)

best_results['mae'] = best_results['neg_mean_absolute_error']
best_results['meae'] = best_results['neg_median_absolute_error']
best_results['rmse'] = best_results['neg_mean_squared_error'].pow(1./2)
best_results = best_results.sort_values(by=['approach'])

In [None]:
from hit_prediction_code import analytics

metric_avg = analytics.aggregate_splits_per_epoch(results['outcome'], np.average)

display(analytics.aggregate_epochs(metric_avg, np.max).abs())

In [None]:
display(best_results[['date', 'approach', 'mae', 'meae', 'rmse', 'pearsonr', 'spearmanr', 'kendalltau']])


display(best_results[['approach', 'mae']].plot.bar(x='approach', y='mae', title='Wide and Deep MAE', figsize=(12, 6)))
display(best_results[['approach', 'rmse']].plot.bar(x='approach', y='rmse', title='Wide and Deep RMSE', figsize=(12, 6)))

display(best_results[['approach', 'pearsonr']].plot.bar(x='approach', y='pearsonr', title='Wide and Deep Pearson Correlation', figsize=(12, 6)))
display(best_results[['approach', 'spearmanr']].plot.bar(x='approach', y='spearmanr', title='Wide and Deep Spearman Correlation', figsize=(12, 6)))
display(best_results[['approach', 'kendalltau']].plot.bar(x='approach', y='kendalltau', title='Wide and Deep Kendall Correlation', figsize=(12, 6)))