## Analysis of OpenML Experiments

In [1]:
import pandas as pd
import altair as alt

In [2]:
performances = pd.read_csv('resource/openml_results.csv')
performances.head()

Unnamed: 0,Dataset,Type,Auto-Sklearn,AutoGluon,AutoWEKA,H2O,TPOT,AlphaD3M
0,task_10101,binary,0.761,0.782,0.755,0.747,0.779,0.79
1,task_12,multiclass,0.976,0.981,0.977,0.98,0.97,0.976
2,task_146195,multiclass,0.838,0.869,0.704,0.865,0.86,0.777
3,task_146212,multiclass,1.0,1.0,0.999,1.0,1.0,1.0
4,task_146606,binary,0.728,0.733,0.607,0.721,0.723,0.717


In [3]:
# Change the dataframe to the format of altair lib
performances = pd.melt(performances, id_vars=['Dataset', 'Type'], var_name='AutoML', value_name='Performance')
performances.head()

Unnamed: 0,Dataset,Type,AutoML,Performance
0,task_10101,binary,Auto-Sklearn,0.761
1,task_12,multiclass,Auto-Sklearn,0.976
2,task_146195,multiclass,Auto-Sklearn,0.838
3,task_146212,multiclass,Auto-Sklearn,1.0
4,task_146606,binary,Auto-Sklearn,0.728


In [4]:
def plot_performances(source):
    domain = ['AutoGluon', 'AutoWEKA', 'Auto-Sklearn', 'H2O', 'TPOT', 'AlphaD3M']
    color_range = ['#f7b97c', '#f58517', '#e7ba52', '#e45857', '#d67196', "#396cb0"]
    #color_range = ['#e7cb94', '#e7ba52', '#bd9e3a', '#8c6d31', '#feff99', "#396cb0"]
    #color_range = ['#f58517', '#e7ba52', '#d67196', '#e45857', '#feff99', "#396cb0"]
    
    return alt.Chart(source, title="").mark_point(filled=True, size=32).encode(
        alt.X(
            'Performance:Q',
            title="Accuracy",
            scale=alt.Scale(zero=False),
            axis=alt.Axis(grid=False)
        ),
        alt.Y(
            'Dataset:N',
            title="",
            sort='-x',
            axis=alt.Axis(grid=True)
        ),
        #color=alt.Color('AutoML:N', legend=alt.Legend(title="AutoML")),
        color=alt.Color('AutoML:N', scale=alt.Scale(domain=domain, range=color_range), legend=alt.Legend(title="AutoML")),
        row=alt.Row(
            'Type:N',
            title="",
            sort=alt.EncodingSortField(field='yield', op='sum', order='descending'),
        )
    ).properties(
        height=alt.Step(12),
        width=250
    )
# .configure_view(stroke="transparent")

In [5]:
df_binary = performances[(performances['Type']=='binary')]
chart1 = plot_performances(df_binary)

In [6]:
df_multiclass = performances[(performances['Type']=='multiclass')]
chart2 = plot_performances(df_multiclass)

In [7]:
alt.hconcat(chart1, chart2).configure_view(stroke='transparent')