In [1]:
import numpy as np
import pandas as pd

from os import listdir
from os.path import isfile, join

import matplotlib.pyplot as plt
import altair as alt

In [2]:
dir_path = 'results'

results_files = sorted([join(dir_path, f) for f in listdir(dir_path) if isfile(join(dir_path, f))])
datasets_names = sorted([f.split('.')[0] for f in listdir(dir_path) if isfile(join(dir_path, f))])

results_files, datasets_names

(['results/balance_scale.csv',
  'results/ecoli.csv',
  'results/glass.csv',
  'results/iris.csv',
  'results/yeast.csv'],
 ['balance_scale', 'ecoli', 'glass', 'iris', 'yeast'])

In [3]:
dataframes_dict = {
    dataset: pd.read_csv(results_file) for dataset, results_file in zip(datasets_names, results_files)
}

dataframes_dict

{'balance_scale':     train_size        model  accuracy_1  accuracy_2  accuracy_3
 0          1.0     Baseline       0.912       0.912       0.912
 1          1.0       Vapnik       0.912       0.912       0.912
 2          1.0  Projections       0.912       0.912       0.920
 3          1.0  Hyperplanes       0.880       0.920       0.880
 4          0.5     Baseline       0.896       0.896       0.896
 5          0.5       Vapnik       0.896       0.896       0.896
 6          0.5  Projections       0.840       0.896       0.896
 7          0.5  Hyperplanes       0.880       0.896       0.864
 8          0.1     Baseline       0.864       0.864       0.864
 9          0.1       Vapnik       0.848       0.848       0.848
 10         0.1  Projections       0.856       0.864       0.784
 11         0.1  Hyperplanes       0.768       0.800       0.528
 12         1.0     Baseline       0.912       0.912       0.912
 13         1.0       Vapnik       0.912       0.912       0.912
 14     

In [4]:
accuracies_stacked = {
    dataset: np.hstack(np.array_split(dataframe.iloc[:, -3:].values, 3))
    for dataset, dataframe in dataframes_dict.items()
}

mean_acc = {
    dataset: np.mean(accuracies, axis=1).reshape(-1, 4).T.reshape(-1,) * 100 for dataset, accuracies in accuracies_stacked.items()
}

std_acc = {
    dataset: np.std(accuracies, axis=1).reshape(-1, 4).T.reshape(-1,) * 100 for dataset, accuracies in accuracies_stacked.items()
}

output = []

for dataset in mean_acc.keys():
    print(dataset, end='\t')
    
    acc = mean_acc[dataset]
    std = std_acc[dataset]
    output.append([f"${acc[i]:.2f} \pm {std[i]:.2f}$" for i in range(len(acc))])

out_df = pd.DataFrame(output)

print(out_df.style.to_latex())

balance_scale	ecoli	glass	iris	yeast	\begin{tabular}{lllllllllllll}
 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 \\
0 & $91.47 \pm 0.38$ & $90.93 \pm 1.00$ & $88.80 \pm 1.73$ & $91.47 \pm 0.38$ & $90.93 \pm 1.00$ & $87.73 \pm 2.10$ & $91.73 \pm 0.38$ & $90.04 \pm 2.33$ & $83.73 \pm 7.17$ & $90.13 \pm 1.51$ & $84.36 \pm 9.57$ & $76.00 \pm 12.77$ \\
1 & $85.29 \pm 2.08$ & $83.33 \pm 1.83$ & $71.57 \pm 1.39$ & $85.29 \pm 1.20$ & $83.82 \pm 2.08$ & $72.55 \pm 4.22$ & $84.15 \pm 2.05$ & $83.01 \pm 1.85$ & $67.32 \pm 4.03$ & $75.98 \pm 6.28$ & $77.78 \pm 3.50$ & $63.89 \pm 9.61$ \\
2 & $72.87 \pm 7.91$ & $65.89 \pm 2.90$ & $56.59 \pm 4.78$ & $72.87 \pm 7.91$ & $65.89 \pm 2.90$ & $45.74 \pm 6.67$ & $72.09 \pm 7.44$ & $67.18 \pm 3.87$ & $52.45 \pm 7.03$ & $71.06 \pm 7.60$ & $60.47 \pm 6.58$ & $45.48 \pm 9.50$ \\
3 & $96.67 \pm 0.00$ & $93.33 \pm 2.72$ & $93.33 \pm 2.72$ & $95.56 \pm 1.57$ & $92.22 \pm 4.16$ & $90.00 \pm 4.71$ & $95.19 \pm 1.66$ & $95.19 \pm 2.28$ & $77.78 \pm 22.93$ & $8

In [5]:
accuracies_grouped = {
    dataset: np.hstack(np.array_split(dataframe.iloc[:, -3:].values, 9))
    for dataset, dataframe in dataframes_dict.items()
}

accuracies_grouped = np.hstack([accuracies for accuracies in accuracies_grouped.values()])

invariants = ['Baseline', 'Vapnik', 'Random projections', 'Random hyperplanes']
output_dict = {'Invariant 1': [], 'Invariant 2': [], 'wins': [], 'draws': [], 'loses': []}

for i in range(len(invariants)):
    inv_base = invariants[i]
    acc_base = accuracies_grouped[i]
    
    for j in range(i+1, len(invariants)):
        if i != j:
            inv_comp = invariants[j]
            acc_comp = accuracies_grouped[j]
            
            output_dict['Invariant 1'].append(inv_base)
            output_dict['Invariant 2'].append(inv_comp)
            output_dict['wins'].append(np.sum(acc_base > acc_comp))
            output_dict['draws'].append(np.sum(acc_base == acc_comp))
            output_dict['loses'].append(np.sum(acc_base < acc_comp))

pd.DataFrame(output_dict)
            
            

Unnamed: 0,Invariant 1,Invariant 2,wins,draws,loses
0,Baseline,Vapnik,45,66,24
1,Baseline,Random projections,57,48,30
2,Baseline,Random hyperplanes,104,17,14
3,Vapnik,Random projections,48,50,37
4,Vapnik,Random hyperplanes,101,14,20
5,Random projections,Random hyperplanes,88,21,26


In [6]:
accuracies_grouped = {
    dataset: np.hstack(np.array_split(dataframe.iloc[:, -3:].values, 9))
    for dataset, dataframe in dataframes_dict.items()
}

accuracies_grouped = np.hstack([accuracies for accuracies in accuracies_grouped.values()])

invariants = ['Baseline', 'Vapnik', 'Random projections', 'Random hyperplanes']

out_tables = []

for i in range(len(invariants)):
    id_counter = 1
    inv_base = invariants[i]
    acc_base = accuracies_grouped[i]
    
    for j in range(i+1, len(invariants)):
        inv_comp = invariants[j]
        acc_comp = accuracies_grouped[j]
        
        n_experiments = len(acc_base)
        
        output_dict = {
            'invariant_1': [inv_base] * n_experiments ,
            'invariant_2': [inv_comp] * n_experiments,
            'result': [],
            'id': np.arange(1, n_experiments + 1)
        }
        
        sort_dict = {inv_base: 0, inv_comp: 1, 'Tie': 2}
        train_sizes = [1.0] * 3 + [0.5] * 3 + [0.1] * 3
        train_sizes = train_sizes * 15
        results_list = []
        
        
        for k in range(len(acc_base)):
            if acc_base[k] > acc_comp[k]:
                result = inv_base
            elif acc_base[k] < acc_comp[k]:
                result = inv_comp
            else:
                result = 'Tie'
            
            results_list.append((result, train_sizes[k]))
        
        results_list = sorted(results_list, key=lambda x: (sort_dict[x[0]], x[1]))
        results_list_out = list(map(lambda x: x[0], results_list))
        output_dict['result'] = results_list_out
        trains = list(map(lambda x: x[1], results_list))

        output_dict['train_size'] = trains
        out_df = pd.DataFrame(output_dict)
        
        out_tables.append(out_df)
            
out_tables

[    invariant_1 invariant_2    result   id  train_size
 0      Baseline      Vapnik  Baseline    1         0.1
 1      Baseline      Vapnik  Baseline    2         0.1
 2      Baseline      Vapnik  Baseline    3         0.1
 3      Baseline      Vapnik  Baseline    4         0.1
 4      Baseline      Vapnik  Baseline    5         0.1
 ..          ...         ...       ...  ...         ...
 130    Baseline      Vapnik       Tie  131         1.0
 131    Baseline      Vapnik       Tie  132         1.0
 132    Baseline      Vapnik       Tie  133         1.0
 133    Baseline      Vapnik       Tie  134         1.0
 134    Baseline      Vapnik       Tie  135         1.0
 
 [135 rows x 5 columns],
     invariant_1         invariant_2    result   id  train_size
 0      Baseline  Random projections  Baseline    1         0.1
 1      Baseline  Random projections  Baseline    2         0.1
 2      Baseline  Random projections  Baseline    3         0.1
 3      Baseline  Random projections  Baselin

In [7]:
out_tables_small = []

for table in out_tables:
    small_table = table.copy()
    small_table = small_table[small_table['train_size'] == 0.1]
    small_table['id'] = np.arange(1, small_table.shape[0] + 1)
    out_tables_small.append(small_table)

out_tables_small

[   invariant_1 invariant_2    result  id  train_size
 0     Baseline      Vapnik  Baseline   1         0.1
 1     Baseline      Vapnik  Baseline   2         0.1
 2     Baseline      Vapnik  Baseline   3         0.1
 3     Baseline      Vapnik  Baseline   4         0.1
 4     Baseline      Vapnik  Baseline   5         0.1
 5     Baseline      Vapnik  Baseline   6         0.1
 6     Baseline      Vapnik  Baseline   7         0.1
 7     Baseline      Vapnik  Baseline   8         0.1
 8     Baseline      Vapnik  Baseline   9         0.1
 9     Baseline      Vapnik  Baseline  10         0.1
 10    Baseline      Vapnik  Baseline  11         0.1
 11    Baseline      Vapnik  Baseline  12         0.1
 12    Baseline      Vapnik  Baseline  13         0.1
 13    Baseline      Vapnik  Baseline  14         0.1
 14    Baseline      Vapnik  Baseline  15         0.1
 15    Baseline      Vapnik  Baseline  16         0.1
 16    Baseline      Vapnik  Baseline  17         0.1
 17    Baseline      Vapnik 

In [8]:
colors_dict = {
    'Baseline': '#8dd3c7',
    'Vapnik': '#ffffb3',
    'Random projections': '#bebada',
    'Random hyperplanes': '#fb8072',
    'Tie': '#858796',
}

charts = []

for out_table, small_table in zip(out_tables, out_tables_small):
    
    color_inv_1 = colors_dict[out_table.iloc[0, 0]]
    color_inv_2 = colors_dict[out_table.iloc[0, 1]]

    chart1 = alt.Chart(out_table, title='Performance considering all experiments').transform_calculate(
        row="ceil(datum.id/15)",
        col="datum.id - datum.row*15",
        result=f"datum.result == datum.invariant_1 ? datum.result : (datum.result == datum.invariant_2 ? '\u200b' + datum.result : '\u200b\u200bTies')",
    ).mark_point(
        filled=True,
        size=500,
        shape='square',
        opacity=1.0,
    ).encode(
        x=alt.X("col:O", axis=None),
        y=alt.Y("row:O", axis=None),
        color=alt.Color('result:N', scale=alt.Scale(range=[color_inv_1, color_inv_2, colors_dict['Tie']])),
        tooltip=alt.Tooltip(['result:N', 'train_size:N']),
        strokeWidth=alt.StrokeWidthValue(0),
    ).properties(
        width=400,
        height=250
    )
    
    chart2 = alt.Chart(small_table, title=['Performance considering experiments', 'with 10% of training data']).transform_calculate(
        row="ceil(datum.id/9)",
        col="datum.id - datum.row*9",
        result=f"datum.result == datum.invariant_1 ? datum.result : (datum.result == datum.invariant_2 ? '\u200b' + datum.result : '\u200b\u200bTies')",
    ).mark_point(
        filled=True,
        size=500,
        shape='square',
        opacity=1.0,
    ).encode(
        x=alt.X("col:O", axis=None),
        y=alt.Y("row:O", axis=None),
        color=alt.Color('result:N', scale=alt.Scale(range=[color_inv_1, color_inv_2, colors_dict['Tie']]), legend=alt.Legend(title='Result', orient="right")),
        tooltip=alt.Tooltip(['result:N']),
        strokeWidth=alt.StrokeWidthValue(0),
    ).properties(
        width=250,
        height=140
    )
    
    chart = alt.hconcat(chart1, chart2, spacing=75)
    
    charts.append(chart)

alt.vconcat(*charts[:3], spacing=50).resolve_scale(
    color='independent'
).configure_view(
    stroke=None,
)

In [9]:
alt.vconcat(*charts[3:], spacing=50).resolve_scale(
    color='independent'
).configure_view(
    stroke=None,
)