# Scalarizing results analysis

In [1]:
import mlflow
import pandas as pd
import numpy as np

In [133]:
pd.set_option('display.max_rows', 150)

In [2]:
SCALARIZING_EXPERIMENT_ID = '1'
BASELINE_EXPERIMENT_ID = '2'

## Load baseline

In [180]:
baseline_df = mlflow.search_runs(BASELINE_EXPERIMENT_ID, output_format='pandas')

In [181]:
baseline_df = baseline_df.query("not `metrics.kne_test_acc`.isna()")\
    .assign(dataset=lambda df: df['params.train_path'].str.split('/').str.get(-1).str.split('-train').str.get(0))

## Load scalarizing

In [220]:
scalarizing_df['params.ensemble_size'].value_counts()

5     4874
10    4863
20    3875
Name: params.ensemble_size, dtype: int64

In [170]:
scalarizing_df = mlflow.search_runs(SCALARIZING_EXPERIMENT_ID, output_format='pandas')

In [179]:
scalarizing_df = scalarizing_df.query("status == 'FINISHED'")\
    .dropna(axis=1, how='all')\
    .drop('params.train_and_test_paths', axis=1)\
    .assign(**{"metrics.accuracy_ensemble_selected": lambda df: df['metrics.accuracy_ensemble_selected'].fillna(0.0)})\
    .dropna()\
    .assign(dataset=lambda row: row['params.dataset'].str.split('-').str[0])

## Join

In [182]:
analysis_df = scalarizing_df.merge(baseline_df, left_on=['dataset', 'params.bagging_size'], right_on=['dataset', 'params.bagging_size'], how='inner', suffixes=("","_base"))

In [200]:
mean_accuracies = analysis_df.groupby(['params.scoring_method', 'dataset'])\
    [['metrics.selected_ensemble_accuracy', 'metrics.kne_test_acc', 'metrics.bagging_test_acc', 'metrics.ola_test_acc', 'metrics.mcb_test_acc', 'metrics.desp_test_acc', 'metrics.knorau_test_acc']]\
    .mean()\
    .rename(lambda name: name.replace('metrics.', ''), axis=1)\
    .assign(best=lambda row: row.max(axis=1))\
    .assign(is_method_best=lambda row: row['selected_ensemble_accuracy'] == row['best'])\
    .assign(difference_method_to_best=lambda row: row['selected_ensemble_accuracy'] - row['best'])

In [214]:
mean_accuracies.to_csv('results.csv')

In [196]:
def count_better_than(series):
    size = len(series)
    new_values = series.copy(deep=True)
    
    for idx, value in enumerate(series):
        values_without_current = series[~series.index.isin([idx])]
        
        rank = (values_without_current > value).sum()
        new_values[idx] = rank
        
    
    return new_values

In [195]:
datasets_ranking = analysis_df.groupby(['params.scoring_method', 'dataset'])\
    [['metrics.selected_ensemble_accuracy', 'metrics.kne_test_acc', 'metrics.bagging_test_acc', 'metrics.ola_test_acc', 'metrics.mcb_test_acc', 'metrics.desp_test_acc', 'metrics.knorau_test_acc']]\
    .mean()\
    .rename(lambda name: name.replace('metrics.', ''), axis=1)\
    .apply(count_better_than, axis=1)\
    .assign(method_best=lambda df: df['selected_ensemble_accuracy'] == 0)

In [197]:
datasets_ranking['selected_ensemble_accuracy'].groupby('params.scoring_method').mean()

params.scoring_method
diversity    3.125
normal       3.125
Name: selected_ensemble_accuracy, dtype: float64

In [203]:
pd.Series(['a', 'b', 'c']).str.join

<pandas.core.strings.accessor.StringMethods at 0x2b20871dbf10>

In [215]:
datasets_ranking.to_csv('rankings.csv')

In [None]:
for_analysis_df = experiments_for_analysis_df.groupby(['params.scoring_method', 'dataset'])\
    .mean()\
    .assign(is_method_better=lambda row: row['metrics.selected_ensemble_accuracy'] > row['metrics.accuracy_selection_accuracy'])\
    .assign(is_method_equal=lambda row: row['metrics.selected_ensemble_accuracy'] == row['metrics.accuracy_selection_accuracy'])\
    .assign(difference=lambda row: row['metrics.selected_ensemble_accuracy'] - row['metrics.accuracy_selection_accuracy'])
    
    

In [None]:
for_analysis_df['is_method_better'].groupby(level=0).count()

In [None]:
for_analysis_df['difference']

In [None]:
experiments_for_analysis_df\
 .assign(is_method_better=lambda row: row['metrics.selected_ensemble_accuracy'] > row['metrics.accuracy_selection_accuracy'])\
    .assign(is_method_equal=lambda row: row['metrics.selected_ensemble_accuracy'] == row['metrics.accuracy_selection_accuracy'])\
    .assign(difference=lambda row: row['metrics.selected_ensemble_accuracy'] - row['metrics.accuracy_selection_accuracy'])\
    ['is_method_better'].value_counts()