In [1]:
from __future__ import print_function, division

import numpy as np
import sys
import os
import json
import pandas as pd


# To make rows in tables sorted in order 
def custom_sort(x):
    if 'no_clean' in x:
        return(1, x)
    all_cl_methods = ['argmax', 'conf_joint_only', 'cl_pbc', 'cl_pbnr', 'cl_both', 'cl']
    if x in all_cl_methods:
        return (2, str(all_cl_methods.index(x))+x)
    return (3, x)

In [2]:
df = pd.read_csv('mask_results_20.csv')
cols_keep = ['dataset', 'method']
metrics = ['accuracy', 'balanced_accuracy', 'f1', 'f1_macro', 'acc_pruned', 'remove_pct']
dfs_acc, dfs_bl_acc, dfs_f1, dfs_f1_macro, dfs_acc_pruned, dfs_remove_pct = {}, {}, {}, {}, {}, {}
dfs = [dfs_acc, dfs_bl_acc, dfs_f1, dfs_f1_macro, dfs_acc_pruned, dfs_remove_pct]
row_order = list(np.unique(df['method']))
row_order.sort(key=custom_sort)


for i in range(len(metrics)):
    dfs_metric = dfs[i]
    metric = metrics[i]
    
    for seed in np.unique(df['seed']):
        df_metric_seed = df[df['seed']==seed][[metric]+cols_keep]
        df_metric_seed = pd.concat([
            z.sort_values(by=['dataset']).set_index(
                ['dataset']).drop(
                ['method'], axis=1).T.set_index([[i]]) \
            for i, z in df_metric_seed.groupby(['method'])
        ])
        df_metric_seed = df_metric_seed.reindex(row_order)
        dfs_metric[seed] = df_metric_seed

In [3]:
dfs_score = dfs_acc

dfs_score_mean = (pd.concat(dfs_score.values())*100).mean(level=0)
dfs_score_std = (pd.concat(dfs_score.values())*100).std(level=0)
table = dfs_score_mean.round(1).astype(str) + '±' + dfs_score_std.round(1).astype(str)
table

dataset,Cardiotocography,CreditFraud,HAR,Letter,Mushroom,SatIm,SenDrive
no_clean,80.1±0.0,80.1±0.0,80.2±0.0,82.7±0.0,80.0±0.0,80.4±0.0,80.2±0.0
cl,92.9±0.7,88.9±1.3,85.3±0.4,90.7±0.3,89.2±0.7,92.0±0.2,89.8±0.4
kmeans,98.4±0.3,94.7±0.5,95.3±0.3,98.4±0.1,98.9±0.2,95.7±0.3,98.5±0.1


In [4]:
dfs_score = dfs_bl_acc

dfs_score_mean = (pd.concat(dfs_score.values())*100).mean(level=0)
dfs_score_std = (pd.concat(dfs_score.values())*100).std(level=0)
table = dfs_score_mean.round(1).astype(str) + '±' + dfs_score_std.round(1).astype(str)
table

dataset,Cardiotocography,CreditFraud,HAR,Letter,Mushroom,SatIm,SenDrive
no_clean,50.0±0.0,50.0±0.0,50.0±0.0,50.0±0.0,50.0±0.0,50.0±0.0,50.0±0.0
cl,88.4±2.7,82.6±1.4,88.3±0.3,92.1±0.3,85.2±1.1,92.0±0.6,92.4±0.4
kmeans,97.3±0.7,92.7±1.1,89.6±0.9,96.0±0.3,97.7±0.2,93.4±0.6,96.2±0.3


In [5]:
df = pd.read_csv('evaluation_20.csv')
cols_keep = ['dataset', 'method']
metrics = ['accuracy', 'balanced_accuracy', 'f1_macro', 'roc_auc_ovr']
dfs_acc, dfs_bl_acc, dfs_f1_macro, dfs_roc_auc_ovr = {}, {}, {}, {}
dfs = [dfs_acc, dfs_bl_acc, dfs_f1_macro, dfs_roc_auc_ovr]
row_order = list(np.unique(df['method']))
row_order.sort(key=custom_sort)


for i in range(len(metrics)):
    dfs_metric = dfs[i]
    metric = metrics[i]
    
    for seed in np.unique(df['seed']):
        df_metric_seed = df[df['seed']==seed][[metric]+cols_keep]
        df_metric_seed = pd.concat([
            z.sort_values(by=['dataset']).set_index(
                ['dataset']).drop(
                ['method'], axis=1).T.set_index([[i]]) \
            for i, z in df_metric_seed.groupby(['method'])
        ])
        df_metric_seed = df_metric_seed.reindex(row_order)
        dfs_metric[seed] = df_metric_seed

In [6]:
dfs_score = dfs_acc

dfs_score_mean = (pd.concat(dfs_score.values())*100).mean(level=0)
dfs_score_std = (pd.concat(dfs_score.values())*100).std(level=0)
table = dfs_score_mean.round(1).astype(str) + '±' + dfs_score_std.round(1).astype(str)
table

dataset,Cardiotocography,CreditFraud,HAR,Letter,Mushroom,SatIm,SenDrive
no_clean,87.9±3.2,90.9±2.4,70.6±0.9,82.5±0.9,86.1±1.3,87.3±0.7,81.8±9.3
cl,89.9±3.4,91.2±1.2,87.5±0.7,93.2±0.5,95.5±0.7,87.8±0.4,98.3±0.2
kmeans,97.1±1.1,95.0±0.8,86.8±0.6,94.4±0.6,99.2±0.2,88.5±0.4,98.1±0.2


In [7]:
dfs_score = dfs_bl_acc

dfs_score_mean = (pd.concat(dfs_score.values())*100).mean(level=0)
dfs_score_std = (pd.concat(dfs_score.values())*100).std(level=0)
table = dfs_score_mean.round(1).astype(str) + '±' + dfs_score_std.round(1).astype(str)
table

dataset,Cardiotocography,CreditFraud,HAR,Letter,Mushroom,SatIm,SenDrive
no_clean,92.9±2.0,90.8±2.4,70.1±0.9,82.4±0.9,86.1±1.3,86.7±0.6,81.8±9.3
cl,94.3±1.5,91.1±1.2,87.3±0.7,93.1±0.5,95.5±0.7,87.7±0.4,98.3±0.2
kmeans,96.9±1.2,93.7±1.1,86.4±0.6,94.4±0.6,99.2±0.2,88.0±0.6,98.1±0.2


In [8]:
dfs_score = dfs_f1_macro

dfs_score_mean = (pd.concat(dfs_score.values())*100).mean(level=0)
dfs_score_std = (pd.concat(dfs_score.values())*100).std(level=0)
table = dfs_score_mean.round(1).astype(str) + '±' + dfs_score_std.round(1).astype(str)
table

dataset,Cardiotocography,CreditFraud,HAR,Letter,Mushroom,SatIm,SenDrive
no_clean,81.8±3.9,90.0±2.6,70.2±0.9,82.4±0.9,86.1±1.4,86.0±0.7,81.4±10.6
cl,84.8±4.9,90.3±1.2,87.3±0.7,93.1±0.5,95.5±0.7,86.9±0.4,98.3±0.2
kmeans,95.3±1.8,94.2±1.0,86.5±0.6,94.4±0.6,99.2±0.2,87.4±0.5,98.1±0.2


In [9]:
dfs_score = dfs_roc_auc_ovr

dfs_score_mean = (pd.concat(dfs_score.values())*100).mean(level=0)
dfs_score_std = (pd.concat(dfs_score.values())*100).std(level=0)
table = dfs_score_mean.round(1).astype(str) + '±' + dfs_score_std.round(1).astype(str)
table

dataset,Cardiotocography,CreditFraud,HAR,Letter,Mushroom,SatIm,SenDrive
no_clean,98.6±0.7,90.8±2.4,91.3±0.6,99.0±0.1,86.1±1.3,97.6±0.1,98.0±1.6
cl,99.0±0.5,91.1±1.2,98.4±0.1,99.8±0.0,95.5±0.7,98.2±0.1,99.9±0.0
kmeans,99.3±0.5,93.7±1.1,98.4±0.2,99.9±0.0,99.2±0.2,98.4±0.1,100.0±0.0
