In [None]:
import copy
import os.path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from dbispipeline.analytics import get_results_as_dataframe
from mediaeval2020.analytics import extract_best_epoch
from mediaeval2020.analytics import extract_best_outcome
from mediaeval2020.analytics import extract_final_outcome
from mediaeval2020.analytics import extract_metrics
from mediaeval2020.analytics import plot_per_label
from mediaeval2020.analytics import print_per_label


results = get_results_as_dataframe(project_name='mediaeval2020')

row = results[results['id'] == 484].iloc[0]
outcome = copy.deepcopy(row['outcome'])

for _, v in outcome.items():
    for k in list(v.keys()):
        if ('all' in k) or ('confusion_matrix' in k):
            del v[k]

print(outcome)

In [None]:
metric = 'f1_macro'

results['final_outcome'] = results['outcome'].apply(extract_final_outcome)
results['best_outcome'] = results['outcome'].apply(lambda v: extract_best_outcome(v, metric=metric))
results['best_epoch'] = results['outcome'].apply(lambda v: extract_best_epoch(v, metric=metric))

def display_outcome(col):
    outcome = []
    for _, row in results.iterrows():
        out = copy.deepcopy(row[col])
        del out['confusion_matrix']
        del out['average_precision_all']
        del out['roc_auc_all']
        out['id'] = row['id']
        outcome.append(out)

    outcome = pd.DataFrame(outcome).merge(results[['id', 'sourcefile']], on=['id'])
    outcome['name'] = outcome['id'].apply(str) + ' ' + outcome['sourcefile']
    outcome[['name', 'f1_micro', 'f1_macro', 'average_precision', 'precision_micro', 'precision_macro']].plot.bar(x='name', title=col, figsize=(12,6))
    plt.show()

results = results.sort_values(by=['id'])
display(results[['id', 'best_epoch']])
display_outcome('final_outcome')

display_outcome('best_outcome')
display(pd.json_normalize(results['best_outcome']))

In [None]:
row = results[results['id'] == 502].iloc[0]

data = extract_metrics(row)
print_per_label(data.sort_values(by=['pr-auc']))
plt.show()

display(data.mean())