In [None]:
import os
import pandas as pd
import numpy as np

# Constant
BASE_COUNT = 1

In [None]:
sum_df_cv = pd.DataFrame(columns=['Name', 'Count', 'Accuracy', 'F1', 'Precision', 'Recall', 'ROC', 'Average'])

for base_num in range(1, BASE_COUNT + 1):
    other_df_cv = pd.read_json(
        os.path.join('datasets', 'base_{}_evaluation_result_cv.json'.format(base_num)),
    )

    # The index should be an integer.
    other_df_cv.index = other_df_cv.index.map(int)
    other_df_cv.sort_index()

    for index, row in other_df_cv.iterrows():
        if not index in sum_df_cv.index:
            sum_df_cv.loc[index] = [row[0], 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

        # Validation
        sum_df_cv.loc[index, 'Name'] = row[0] if sum_df_cv.loc[index, 'Name'] == row[0] else 'Invalid'
        
        # Sum
        sum_df_cv.loc[index, 'Count']     += 1
        sum_df_cv.loc[index, 'Accuracy']  += row['Accuracy']
        sum_df_cv.loc[index, 'F1']        += row['F1']
        sum_df_cv.loc[index, 'Precision'] += row['Precision']
        sum_df_cv.loc[index, 'Recall']    += row['Recall']
        
sum_df_cv['Accuracy']  = sum_df_cv.apply(lambda x: np.round(x['Accuracy']  / x['Count'], 4), axis=1)
sum_df_cv['F1']        = sum_df_cv.apply(lambda x: np.round(x['F1']        / x['Count'], 4), axis=1)
sum_df_cv['Precision'] = sum_df_cv.apply(lambda x: np.round(x['Precision'] / x['Count'], 4), axis=1)
sum_df_cv['Recall']    = sum_df_cv.apply(lambda x: np.round(x['Recall']    / x['Count'], 4), axis=1)

sum_df_cv['Average']   = sum_df_cv[['Accuracy', 'F1', 'Precision', 'Recall']].mean(axis=1)
sum_df_cv['Average']   = sum_df_cv['Average'].apply(lambda x: np.round(x, 4))

In [None]:
# Visualize the scores.

sum_df_cv.sort_values('Average', ascending=False)[['Name', 'Accuracy', 'Precision', 'Recall', 'F1', 'Average']].head(25)

In [None]:
# Standard deviation and other aggregates on F1.

# Make an array like ['1', '2', ...] for every base.
base_columns = [str(x) for x in range(1, BASE_COUNT + 1)]

# Make an array of aggregates (human readable, function name).
aggrs = [('Min', 'min'), ('Max', 'max'), ('Average', 'mean'), ('Std', 'std')]

# Export the human readable names of aggregates.
aggrs_names = [x[0] for x in aggrs]

# Make a dataframe
sum_f1_df_cv = pd.DataFrame(columns=['Name'] + base_columns + aggrs_names)

for base_num in range(1, BASE_COUNT + 1):
    # Read the base.
    other_f1_df_cv = pd.read_json(
        os.path.join('datasets', 'base_{}_evaluation_result_cv.json'.format(base_num)),
    )

    # The index should be an integer.
    other_f1_df_cv.index = other_f1_df_cv.index.map(int)
    other_f1_df_cv.sort_index()

    # Iterate over every row (algorithm) in the base.
    for index, row in other_f1_df_cv.iterrows():
        # Add the name of the algorithm in the new DataFrame if it doesn't exist.
        if not index in sum_f1_df_cv.index:
            sum_f1_df_cv.loc[index] = [row[0]] + [0.0 for _ in base_columns] + [0.0 for _ in aggrs]

        # Validation
        sum_f1_df_cv.loc[index, 'Name'] = row[0] if sum_f1_df_cv.loc[index, 'Name'] == row[0] else 'Invalid'
   
        # F1 score for this algorithm and base.
        sum_f1_df_cv.loc[index, str(base_num)] = row['F1']

# Round all scores to 4 decimal places.
for base_column in base_columns:
    sum_f1_df_cv[base_column]  = sum_f1_df_cv[base_column].apply(lambda x: np.round(x, 4))

# Aggregate using every function in 'aggrs' and round to 4 decimal places.
for a in aggrs:
    sum_f1_df_cv[a[0]]   = getattr(sum_f1_df_cv[base_columns], a[1])(axis=1)
    sum_f1_df_cv[a[0]]   = sum_f1_df_cv[a[0]].apply(lambda x: np.round(x, 4))

In [None]:
# Visualize aggregates of F1 scores.

sum_f1_df_cv.sort_values('Average', ascending=False)[['Name'] + base_columns + aggrs_names].head(25)