

# Analysis of Auto-Sklearn and CLS-Luigi Results for Binary Classification Problems

## Overview <a id=ov>
1. [Dataset Information](#ds_info)<br>
2. [Effeciency](#eff)<br>
3. [Accuracy Scores](#scores)<br>


In [16]:

ds_in_autosklearn = [
        'spambase',
        'pc4',
        'wilt',
        'qsar-biodeg',
        'mozilla4',
        'steel-plates-fault',
        'ozone-level-8hr',
        'eeg-eye-state',
        'madelon',
    ]

dataset_names = [
        'spambase', # exists in autosklearn
        'sylvine',
        'bank-marketing',
        'phoneme',
        'kc1',  # exists in autosklearn
        'pc4', # exists in autosklearn
        'wilt', # exists in autosklearn
        'qsar-biodeg', # exists in autosklearn
        'mozilla4', # exists in autosklearn
        'steel-plates-fault', # exists in autosklearn
        'ozone-level-8hr', # exists in autosklearn
        'eeg-eye-state', # exists in autosklearn
        'madelon',
        'numerai28.6',
        'higgs',
    ]
    

ds_ids = [

        9967,  # steel-plates-fault
        9957,  # qsar-biodeg
        9952,  # phoneme
        9978,  # ozone-level-8hr
        146820,  # wilt
        3899,  # mozilla4
        9983,  # eeg-eye-state
        359962,  # kc1 classification
        359958,  # pc4 classification
        361066,  # bank-marketing classification
        359972,  # sylvin classification
        9976,  # Madelon
        167120,  # numerai28.6
        146606,  # higgs
        43,  #spambase
    ]


components = {
    "feature_preprocessor": [
        "SKLFastICA",
        "SKLFeatureAgglomeration",
        "SKLKernelPCA",
        "SKLNystroem",
        "SKLPCA",
        "SKLPolynomialFeatures",
        "SKLRandomTreesEmbedding",
        "SKLRBFSampler",
        "SKLSelectFromExtraTrees",
        "SKLSelectFromLinearSVC",
        "SKLSelectPercentile",
        "SKLSelectRates"
    ],
    "scaler":[
        "SKLMinMaxScaler",
        "SKLNormalizer",
        "SKLPowerTransformer",
        "SKLQuantileTransformer",
        "SKLRobustScaler",
        "SKLStandardScaler"
    ],
    "imputer": [
        "SKLSimpleImpute"
    ]
}

clf_short_names = {

    'SKLRandomForest': "RF",
    'SKLExtraTrees':"EXT",
    'SKLDecisionTree':"DT",
    'SKLKNearestNeighbors':"KNN",

    'SKLGradientBoosting':"GB",
    'SKLSGD': "SGD",
    'SKLAdaBoost':"ADA",

    'SKLLinearSVC':"LSVC",
    'SKLKernelSVC':"KSVC",
    
    'SKLLinearDiscriminantAnalysis':"LDA",
    'SKLQuadraticDiscriminantAnalysis':"QDA",

    'SKLPassiveAggressive':"PA",
    
    'SKLGaussianNaiveBayes':"GNB",
    'SKLMultinomialNB':"MNB",
    'SKLBernoulliNB':"BNB", 
    'SKLMultiLayerPerceptron':"MLP"
}



askl_clf_short_names = {
    'random_forest' :"RF",
    'extra_trees': "EXT",
    'decision_tree': "DT",
    'k_nearest_neighbors': "KNN",
    'gradient_boosting': "GB",
    'sgd': "SGD",
    'adaboost': "ADA",
    'liblinear_svc': "LSVC",
    'libsvm_svc': "KSVC",
    
    'lda': "LDA",
    'qda': "QDA",
    'passive_aggressive' :"PA",

    'gaussian_nb': "GNB",
    'multinomial_nb': "MNB" ,  
    'bernoulli_nb' :"BNB",

    'mlp': "MLP",

}


In [17]:
import pandas as pd
from openml import tasks
import warnings
import json 
from os.path import join as pjoin
from os import listdir
from statistics import median, geometric_mean
import pickle
import autosklearn.classification
import seaborn as sns


warnings.filterwarnings("ignore")

### 1. Dataset Information <a id=ds_info>
[back to overview](#ov)

In [18]:


def get_ds_info(ds_id):
    task = tasks.get_task(ds_id)
    
    x, _ = task.get_X_and_y(dataset_format='dataframe')    
    
    n_features = x.shape[1]
    n_instances = x.shape[0]
    ds_name = task.get_dataset().name
    url = task.openml_url
    
    return ds_name, n_features, n_instances, url


def make_datasets_df(ds_ids, in_autosklearn_list):
    data  ={
        'ds_name':[],
        'ds_id':[],
        'n_instances':[],
        'n_features':[],
        'in_autosklearn':[]
    }
    
    for ds_id in ds_ids:
        ds_name, n_features, n_instances, url = get_ds_info(ds_id)
        data['ds_id'].append( "\href" + "{" + url + "}" + "{" + str(ds_id) + "}" )
        data['ds_name'].append(ds_name)
        data['n_features'].append(n_features)
        data['n_instances'].append(n_instances)
        data['in_autosklearn'].append("X" if ds_name in in_autosklearn_list else "-")
        
        
    df = pd.DataFrame(data)
    
    df = df.rename(columns={
        'ds_id':'OpenML-ID',
        'ds_name':'Name',
        'n_features':'#Features',
        'n_instances':'#Instances', 
        'in_autosklearn':'In AutoSklearn Metadata?'})
    
    return df 


dataset_info = make_datasets_df(ds_ids, ds_in_autosklearn)

In [19]:
dataset_info.sort_values(by=['#Instances'], ascending=False, inplace=True)

dataset_info

Unnamed: 0,Name,OpenML-ID,#Instances,#Features,In AutoSklearn Metadata?
13,higgs,\href{https://www.openml.org/t/146606}{146606},98050,28,-
12,numerai28.6,\href{https://www.openml.org/t/167120}{167120},96320,21,-
5,mozilla4,\href{https://www.openml.org/t/3899}{3899},15545,5,X
6,eeg-eye-state,\href{https://www.openml.org/t/9983}{9983},14980,14,X
9,bank-marketing,\href{https://www.openml.org/t/361066}{361066},10578,7,-
2,phoneme,\href{https://www.openml.org/t/9952}{9952},5404,5,-
10,sylvine,\href{https://www.openml.org/t/359972}{359972},5124,20,-
4,wilt,\href{https://www.openml.org/t/146820}{146820},4839,5,X
14,spambase,\href{https://www.openml.org/t/43}{43},4601,57,X
11,madelon,\href{https://www.openml.org/t/9976}{9976},2600,500,X


In [20]:
def format_lattex_backslash(s):
    return s.replace('\\textbackslash', '\\')



print(dataset_info.to_latex(index=False, escape=False, formatters={"OpenML-ID": format_lattex_backslash}))

\begin{tabular}{llrrl}
\toprule
Name & OpenML-ID & #Instances & #Features & In AutoSklearn Metadata? \\
\midrule
higgs & \href{https://www.openml.org/t/146606}{146606} & 98050 & 28 & - \\
numerai28.6 & \href{https://www.openml.org/t/167120}{167120} & 96320 & 21 & - \\
mozilla4 & \href{https://www.openml.org/t/3899}{3899} & 15545 & 5 & X \\
eeg-eye-state & \href{https://www.openml.org/t/9983}{9983} & 14980 & 14 & X \\
bank-marketing & \href{https://www.openml.org/t/361066}{361066} & 10578 & 7 & - \\
phoneme & \href{https://www.openml.org/t/9952}{9952} & 5404 & 5 & - \\
sylvine & \href{https://www.openml.org/t/359972}{359972} & 5124 & 20 & - \\
wilt & \href{https://www.openml.org/t/146820}{146820} & 4839 & 5 & X \\
spambase & \href{https://www.openml.org/t/43}{43} & 4601 & 57 & X \\
madelon & \href{https://www.openml.org/t/9976}{9976} & 2600 & 500 & X \\
ozone-level-8hr & \href{https://www.openml.org/t/9978}{9978} & 2534 & 72 & X \\
kc1 & \href{https://www.openml.org/t/359962}{359962} & 

In [21]:
dataset_info.index = dataset_info['Name']
dataset_info

Unnamed: 0_level_0,Name,OpenML-ID,#Instances,#Features,In AutoSklearn Metadata?
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
higgs,higgs,\href{https://www.openml.org/t/146606}{146606},98050,28,-
numerai28.6,numerai28.6,\href{https://www.openml.org/t/167120}{167120},96320,21,-
mozilla4,mozilla4,\href{https://www.openml.org/t/3899}{3899},15545,5,X
eeg-eye-state,eeg-eye-state,\href{https://www.openml.org/t/9983}{9983},14980,14,X
bank-marketing,bank-marketing,\href{https://www.openml.org/t/361066}{361066},10578,7,-
phoneme,phoneme,\href{https://www.openml.org/t/9952}{9952},5404,5,-
sylvine,sylvine,\href{https://www.openml.org/t/359972}{359972},5124,20,-
wilt,wilt,\href{https://www.openml.org/t/146820}{146820},4839,5,X
spambase,spambase,\href{https://www.openml.org/t/43}{43},4601,57,X
madelon,madelon,\href{https://www.openml.org/t/9976}{9976},2600,500,X


In [22]:
dataset_info.index

Index(['higgs', 'numerai28.6', 'mozilla4', 'eeg-eye-state', 'bank-marketing',
       'phoneme', 'sylvine', 'wilt', 'spambase', 'madelon', 'ozone-level-8hr',
       'kc1', 'steel-plates-fault', 'pc4', 'qsar-biodeg'],
      dtype='object', name='Name')

### 2. Effeciency <a id=eff>
[back to overview](#ov)


In [23]:
def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)

In [24]:
def get_component_run_time_and_count(component, dataset, func):
    run_times = []
    
    dataset_path= pjoin("binary_classfication_pipelines/results", dataset)
    
    for file in listdir(dataset_path):
        if file.endswith("run_time.json"):
            if component in file:
                run_times.append(
                    load_json(pjoin(dataset_path, file))["total_seconds"]
                )
                
    if len(run_times) == 0:
        general_run_time = 0
    
    elif func == "mean":
        general_run_time =  sum(run_times) / len(run_times)
        
    elif func == "median":
        
        general_run_time = median(run_times)
        
    elif func == "geometric_mean":
        general_run_time = geometric_mean(run_times)
        
    else:
        general_run_time = func(run_times)

    
    return general_run_time, len(run_times)
                                  

get_component_run_time_and_count("SKLPCA", "kc1", "mean")           
                
    

(0.011207887104579381, 7)

In [25]:
def get_component_in_pipeline_count(component, component_type,  dataset):
    
    ds_run_history = pd.read_csv(
        f"binary_classfication_pipelines/run_histories/{dataset}_train_run_history.csv"
    )
    
    count = ds_run_history[
        (ds_run_history[component_type] == component) &
        (ds_run_history["status"] == "success") 
    ].shape[0]
    
    return count

get_component_in_pipeline_count("SKLMinMaxScaler", "scaler", "kc1")

169

In [26]:
def compute_estimated_saving_per_component(component, component_type, dataset, func="mean"):
    run_time, run_count = get_component_run_time_and_count(component, dataset, func)
    in_pipeline_count = get_component_in_pipeline_count(component, component_type, dataset)
    
    estimated_saved_time = (run_time * in_pipeline_count) - (run_time * run_count) 
    avoided_runs = in_pipeline_count - run_count
    
    return estimated_saved_time, avoided_runs
    

compute_estimated_saving_per_component("SKLMinMaxScaler", "scaler", "kc1")
 

(1.2063159942626953, 168)

In [27]:
def get_savings_per_category(components, dataset, func="mean"):
    
    data = {}
    
    for c_type in components.keys():
        total_saved_time = 0
        total_avoided_runs = 0
        for component in components[c_type]:
            c_saved_time, c_avoided_runs = compute_estimated_saving_per_component(component, c_type, dataset, func)
            total_saved_time += c_saved_time
            total_avoided_runs += c_avoided_runs
        data[c_type] = {
            "total_saved_time": total_saved_time,
            "total_avoided_runs": total_avoided_runs
        }
    return data
        
components 

get_savings_per_category(components, "kc1")       
        
    
    

{'feature_preprocessor': {'total_saved_time': 52.13059401512146,
  'total_avoided_runs': 987},
 'scaler': {'total_saved_time': 17.530094146728516,
  'total_avoided_runs': 1008},
 'imputer': {'total_saved_time': 9.176885604858398,
  'total_avoided_runs': 1182}}

In [28]:
def get_dataset_run_time_and_component_count(dataset):
    
    run_time = load_json(pjoin("binary_classfication_pipelines/logs", f"{dataset}_train_time.json"))["total_seconds"]                                         
    
    runhistory = pd.read_csv(
        f"binary_classfication_pipelines/run_histories/{dataset}_train_run_history.csv"
    )
    
    success = runhistory[runhistory["status"] == "success"]
    
    component_count = 0
    
    component_count+= success[success["imputer"].notna()].shape[0]
    component_count+= success[success["scaler"].notna()].shape[0]
    component_count+= success[success["feature_preprocessor"].notna()].shape[0]
    
    return run_time, component_count
             
            
get_dataset_run_time_and_component_count("kc1")                         
    

(136.20363903045654, 3268)

In [29]:
def get_savings_dfs(func="mean", time=True):
    
    total_run_time = []
    total_component =[]

    imputer_saved_time = []
    imputer_avoided_runs = []

    scaler_saved_time = []
    scaler_avoided_runs = []


    f_preproc_saved_time = []
    f_preproc_avoided_runs = []

    saved_time_percent = []
    saved_c_runs_percent = []



    for ds in dataset_names:
        
        run_time, component_count = get_dataset_run_time_and_component_count(ds)
        
        total_run_time.append(run_time)
        total_component.append(component_count)
        
        savings = get_savings_per_category(components=components, dataset=ds, func=func)
        
        imputer_saved_time.append(
            savings["imputer"]["total_saved_time"]
        )
        imputer_avoided_runs.append(
            savings["imputer"]["total_avoided_runs"]
        )
        
        scaler_saved_time.append(
            savings["scaler"]["total_saved_time"]
        )
        scaler_avoided_runs.append(
            savings["scaler"]["total_avoided_runs"]
        )
        
        f_preproc_saved_time.append(
            savings["feature_preprocessor"]["total_saved_time"]
        )
        f_preproc_avoided_runs.append(
            savings["feature_preprocessor"]["total_avoided_runs"]
        )
        
        saved_time_percent.append(
            (savings["imputer"]["total_saved_time"] + savings["scaler"]["total_saved_time"] + savings["feature_preprocessor"]["total_saved_time"]) / (
                (savings["imputer"]["total_saved_time"] + savings["scaler"]["total_saved_time"] + savings["feature_preprocessor"]["total_saved_time"] + run_time)) * 100
            
        )
        
        saved_c_runs_percent.append(
            ((savings["imputer"]["total_avoided_runs"] + savings["scaler"]["total_avoided_runs"] + savings["feature_preprocessor"]["total_avoided_runs"]) / component_count) * 100
        )
    
    if time: 
        df_time_savings = pd.DataFrame()

        df_time_savings["Dataset"] = dataset_names
        df_time_savings["Total Run Sec"] = total_run_time

        df_time_savings["Imputer Saved Sec"] = imputer_saved_time
        df_time_savings["Scaler Saved Sec"] = scaler_saved_time
        df_time_savings["Feature Preprocessor Saved Sec"] = f_preproc_saved_time

        df_time_savings["Saved %"] = saved_time_percent
    
        return df_time_savings
    
    df_avoided_exec = pd.DataFrame()

    df_avoided_exec["Dataset"] = dataset_names
    df_avoided_exec["Total Components"] = total_component

    df_avoided_exec["Imputer Avoided Runs"] = imputer_avoided_runs
    df_avoided_exec["Scaler Avoided Runs"] = scaler_avoided_runs
    df_avoided_exec["Feature Preprocessor Avoided Runs"] = f_preproc_avoided_runs

    df_avoided_exec["Avoided %"] = saved_c_runs_percent

    return df_avoided_exec
    
    

In [30]:
mean_savings = get_savings_dfs()
mean_savings

Unnamed: 0,Dataset,Total Run Sec,Imputer Saved Sec,Scaler Saved Sec,Feature Preprocessor Saved Sec,Saved %
0,spambase,435.392286,14.941325,87.542124,201.173507,41.087513
1,sylvine,464.592113,12.70501,42.592884,125.302522,27.991709
2,bank-marketing,440.932985,10.804343,12.204872,287.137672,41.293463
3,phoneme,207.021385,7.801367,17.855415,83.446126,34.512662
4,kc1,136.203639,9.176886,17.530094,52.130594,36.661611
5,pc4,238.04228,9.265467,18.776653,55.359076,25.945836
6,wilt,245.151585,10.017337,15.840561,61.426557,26.256014
7,qsar-biodeg,132.103318,8.862667,17.664677,70.149435,42.257513
8,mozilla4,789.121826,11.654935,12.161654,962.625249,55.556546
9,steel-plates-fault,343.023263,9.510545,20.702597,62.321604,21.245103


In [31]:
median_savings = get_savings_dfs("median")
median_savings

Unnamed: 0,Dataset,Total Run Sec,Imputer Saved Sec,Scaler Saved Sec,Feature Preprocessor Saved Sec,Saved %
0,spambase,435.392286,14.941325,87.542124,191.336661,40.292802
1,sylvine,464.592113,12.70501,42.592884,103.020681,25.415935
2,bank-marketing,440.932985,10.804343,12.204872,276.952053,40.486381
3,phoneme,207.021385,7.801367,17.855415,72.127158,32.080785
4,kc1,136.203639,9.176886,17.530094,49.974578,36.020146
5,pc4,238.04228,9.265467,18.776653,59.077756,26.792748
6,wilt,245.151585,10.017337,15.840561,59.247367,25.769417
7,qsar-biodeg,132.103318,8.862667,17.664677,73.132408,43.000704
8,mozilla4,789.121826,11.654935,12.161654,907.367224,54.128975
9,steel-plates-fault,343.023263,9.510545,20.702597,63.257653,21.41399


In [32]:
g_mean_savings = get_savings_dfs("geometric_mean")
g_mean_savings

Unnamed: 0,Dataset,Total Run Sec,Imputer Saved Sec,Scaler Saved Sec,Feature Preprocessor Saved Sec,Saved %
0,spambase,435.392286,14.941325,87.542124,155.069028,37.167822
1,sylvine,464.592113,12.70501,42.592884,114.856793,26.806703
2,bank-marketing,440.932985,10.804343,12.204872,285.474792,41.163199
3,phoneme,207.021385,7.801367,17.855415,80.209978,33.835338
4,kc1,136.203639,9.176886,17.530094,48.215103,35.486952
5,pc4,238.04228,9.265467,18.776653,51.485995,25.042673
6,wilt,245.151585,10.017337,15.840561,60.73703,26.10274
7,qsar-biodeg,132.103318,8.862667,17.664677,64.192535,40.713839
8,mozilla4,789.121826,11.654935,12.161654,925.242931,54.600719
9,steel-plates-fault,343.023263,9.510545,20.702597,56.584958,20.193994


In [33]:
def get_n_failures_and_timeouts(ds):
    train_summary = load_json(f"binary_classfication_pipelines/logs/{ds}_train_summary.json")
    
    return  train_summary["failed"], train_summary["timeout"]


failed, timeout = [], []

for ds in g_mean_savings["Dataset"]:
    _failed, _timeout = get_n_failures_and_timeouts(ds)
    failed.append(_failed)
    timeout.append(_timeout)
    

g_mean_savings["N Failed"] = failed
g_mean_savings["N Timeout"] = timeout

g_mean_savings = g_mean_savings[["Dataset", "N Failed", "N Timeout", "Total Run Sec",
                                 "Imputer Saved Sec", 	"Scaler Saved Sec", "Feature Preprocessor Saved Sec", 	"Saved %"]]


In [34]:
g_mean_savings

Unnamed: 0,Dataset,N Failed,N Timeout,Total Run Sec,Imputer Saved Sec,Scaler Saved Sec,Feature Preprocessor Saved Sec,Saved %
0,spambase,0,0,435.392286,14.941325,87.542124,155.069028,37.167822
1,sylvine,4,1,464.592113,12.70501,42.592884,114.856793,26.806703
2,bank-marketing,0,0,440.932985,10.804343,12.204872,285.474792,41.163199
3,phoneme,0,0,207.021385,7.801367,17.855415,80.209978,33.835338
4,kc1,0,0,136.203639,9.176886,17.530094,48.215103,35.486952
5,pc4,4,1,238.04228,9.265467,18.776653,51.485995,25.042673
6,wilt,34,1,245.151585,10.017337,15.840561,60.73703,26.10274
7,qsar-biodeg,0,0,132.103318,8.862667,17.664677,64.192535,40.713839
8,mozilla4,4,1,789.121826,11.654935,12.161654,925.242931,54.600719
9,steel-plates-fault,9,2,343.023263,9.510545,20.702597,56.584958,20.193994


In [35]:
g_mean_savings.index = g_mean_savings["Dataset"]
g_mean_savings = g_mean_savings.loc[dataset_info.index]

In [36]:
g_mean_savings

Unnamed: 0_level_0,Dataset,N Failed,N Timeout,Total Run Sec,Imputer Saved Sec,Scaler Saved Sec,Feature Preprocessor Saved Sec,Saved %
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
higgs,higgs,0,135,18074.673454,42.327037,132.516034,1237.420088,7.24723
numerai28.6,numerai28.6,0,127,16687.876151,25.60969,84.875034,1019.820722,6.343551
mozilla4,mozilla4,4,1,789.121826,11.654935,12.161654,925.242931,54.600719
eeg-eye-state,eeg-eye-state,34,1,1043.026864,9.819621,24.03152,726.421802,42.160097
bank-marketing,bank-marketing,0,0,440.932985,10.804343,12.204872,285.474792,41.163199
phoneme,phoneme,0,0,207.021385,7.801367,17.855415,80.209978,33.835338
sylvine,sylvine,4,1,464.592113,12.70501,42.592884,114.856793,26.806703
wilt,wilt,34,1,245.151585,10.017337,15.840561,60.73703,26.10274
spambase,spambase,0,0,435.392286,14.941325,87.542124,155.069028,37.167822
madelon,madelon,19,33,4916.87202,28.169284,313.551201,1742.265883,29.767584


In [38]:
def round_float(x):
    return round(x, 2)

In [39]:
to_round_cols = ['Imputer Saved Sec', 'Scaler Saved Sec',
       'Feature Preprocessor Saved Sec', 'Saved %', 'Total Run Sec']

print(
    g_mean_savings.to_latex(
        index=False,
        formatters={
            col: round_float for col in to_round_cols
        }
            
        )
    )

\begin{tabular}{lrrrrrrr}
\toprule
Dataset & N Failed & N Timeout & Total Run Sec & Imputer Saved Sec & Scaler Saved Sec & Feature Preprocessor Saved Sec & Saved % \\
\midrule
higgs & 0 & 135 & 18074.67 & 42.33 & 132.52 & 1237.42 & 7.25 \\
numerai28.6 & 0 & 127 & 16687.88 & 25.61 & 84.88 & 1019.82 & 6.34 \\
mozilla4 & 4 & 1 & 789.12 & 11.65 & 12.16 & 925.24 & 54.6 \\
eeg-eye-state & 34 & 1 & 1043.03 & 9.82 & 24.03 & 726.42 & 42.16 \\
bank-marketing & 0 & 0 & 440.93 & 10.8 & 12.2 & 285.47 & 41.16 \\
phoneme & 0 & 0 & 207.02 & 7.8 & 17.86 & 80.21 & 33.84 \\
sylvine & 4 & 1 & 464.59 & 12.71 & 42.59 & 114.86 & 26.81 \\
wilt & 34 & 1 & 245.15 & 10.02 & 15.84 & 60.74 & 26.1 \\
spambase & 0 & 0 & 435.39 & 14.94 & 87.54 & 155.07 & 37.17 \\
madelon & 19 & 33 & 4916.87 & 28.17 & 313.55 & 1742.27 & 29.77 \\
ozone-level-8hr & 19 & 1 & 540.99 & 9.97 & 49.18 & 140.65 & 26.97 \\
kc1 & 0 & 0 & 136.2 & 9.18 & 17.53 & 48.22 & 35.49 \\
steel-plates-fault & 9 & 2 & 343.02 & 9.51 & 20.7 & 56.58 & 20.19 \\


In [40]:
from statistics import geometric_mean as gmean
for c in g_mean_savings.columns:
    if "Dataset" not in c:
        if "Failed" not in c and "Timeout" not in c:
            print(c)
            print(round(gmean(g_mean_savings[c]), 2))
            

Total Run Sec
696.37
Imputer Saved Sec
12.79
Scaler Saved Sec
33.6
Feature Preprocessor Saved Sec
204.7
Saved %
26.51


In [41]:
get_savings_dfs(time=False)

Unnamed: 0,Dataset,Total Components,Imputer Avoided Runs,Scaler Avoided Runs,Feature Preprocessor Avoided Runs,Avoided %
0,spambase,3268,1182,1008,987,97.215422
1,sylvine,3258,1177,1008,982,97.206875
2,bank-marketing,3268,1182,1008,987,97.215422
3,phoneme,3268,1182,1008,987,97.215422
4,kc1,3268,1182,1008,987,97.215422
5,pc4,3253,1177,1003,982,97.202582
6,wilt,3168,1147,978,952,97.127525
7,qsar-biodeg,3268,1182,1008,987,97.215422
8,mozilla4,3258,1177,1008,982,97.206875
9,steel-plates-fault,3241,1171,1003,976,97.192225


### 3. Accuracy Scores <a id=scores>
[back to overview](#ov)


In [42]:
def get_n_pipelines_and_accuracy(ds_name, seed=42):
    
    n_pipelines_cls_luigi = load_json(f"binary_classfication_pipelines/logs/{ds_name}_train_summary.json")["n_runs"]
    accuracy_cls_luigi = load_json(f"binary_classfication_pipelines/logs/{ds_name}_test_summary.json")["test_accuracy"]
    
    n_pipelines_askl = load_json(f"askl/results/{ds_name}/smac3-output/run_{seed}/stats.json")["submitted_ta_runs"]
    accuracy_askl = load_json(f"askl/results/{ds_name}/best_pipeline_summary.json")["test_accuracy"]
    
    return n_pipelines_cls_luigi, accuracy_cls_luigi, n_pipelines_askl, accuracy_askl
    

In [43]:
# def get_winner(cls_luigi_accuracy, askl_accuracy):
#     diff = abs(cls_luigi_accuracy - askl_accuracy)
    
#     if diff > 0.01:
#         if cls_luigi_accuracy > askl_accuracy:
#             return "CLS-Luigi"
#         return "AutoSklearn"

#     return "draw"


def get_winner(cls_luigi_accuracy, askl_accuracy):
    if cls_luigi_accuracy > askl_accuracy:
        return "CLS-Luigi"
    elif cls_luigi_accuracy < askl_accuracy:
        return "AutoSklearn"
    else:
        return "draw"

In [44]:
def get_n_pipelines_and_accuracies_df():
    
    dataset = []
    cl_pipes = []
    askl_pipes = []
    
    cl_acc = []
    askl_acc = []
    
    winner = []
    
    for ds in dataset_names:
        
        dataset.append(ds)
        n_pipelines_cls_luigi, accuracy_cls_luigi, n_pipelines_askl, accuracy_askl = get_n_pipelines_and_accuracy(ds)    
        
        cl_pipes.append(n_pipelines_cls_luigi)
        askl_pipes.append(n_pipelines_askl)
        
        cl_acc.append(accuracy_cls_luigi)
        askl_acc.append(accuracy_askl)
        
        winner.append(get_winner(accuracy_cls_luigi, accuracy_askl))
    
    df = pd.DataFrame()
    df["Dataset"] = dataset
    df["# Pipelines CLS-Luigi"] = cl_pipes
    df["# Pipelines AutoSklearn"] = askl_pipes
    
    df["Accuracy CLS-Luigi"] = cl_acc
    df["Accuracy AutoSklearn"] = askl_acc
    df["Winner"] = winner
    
    return df
        
scores_df = get_n_pipelines_and_accuracies_df() 
scores_df
    

Unnamed: 0,Dataset,# Pipelines CLS-Luigi,# Pipelines AutoSklearn,Accuracy CLS-Luigi,Accuracy AutoSklearn,Winner
0,spambase,1183,71,0.93926,0.939262,AutoSklearn
1,sylvine,1183,205,0.94737,0.945419,CLS-Luigi
2,bank-marketing,1183,219,0.8242,0.830813,AutoSklearn
3,phoneme,1183,142,0.91497,0.885397,CLS-Luigi
4,kc1,1183,116,0.891,0.862559,CLS-Luigi
5,pc4,1183,191,0.91096,0.910959,CLS-Luigi
6,wilt,1183,227,0.98347,0.981405,CLS-Luigi
7,qsar-biodeg,1183,109,0.84906,0.811321,CLS-Luigi
8,mozilla4,1183,379,0.96334,0.949839,CLS-Luigi
9,steel-plates-fault,1183,221,1.0,0.994872,CLS-Luigi


In [45]:

scores_df["Accuracy CLS-Luigi"] = scores_df["Accuracy CLS-Luigi"].apply(lambda x: round(x, 3))
scores_df["Accuracy AutoSklearn"] = scores_df["Accuracy AutoSklearn"].apply(lambda x: round(x, 3))
scores_df

Unnamed: 0,Dataset,# Pipelines CLS-Luigi,# Pipelines AutoSklearn,Accuracy CLS-Luigi,Accuracy AutoSklearn,Winner
0,spambase,1183,71,0.939,0.939,AutoSklearn
1,sylvine,1183,205,0.947,0.945,CLS-Luigi
2,bank-marketing,1183,219,0.824,0.831,AutoSklearn
3,phoneme,1183,142,0.915,0.885,CLS-Luigi
4,kc1,1183,116,0.891,0.863,CLS-Luigi
5,pc4,1183,191,0.911,0.911,CLS-Luigi
6,wilt,1183,227,0.983,0.981,CLS-Luigi
7,qsar-biodeg,1183,109,0.849,0.811,CLS-Luigi
8,mozilla4,1183,379,0.963,0.95,CLS-Luigi
9,steel-plates-fault,1183,221,1.0,0.995,CLS-Luigi


In [46]:

def highlight_winners_for_latex(scores_df):

    def func(row):
        if row["Winner"] == "CLS-Luigi":
            row["Accuracy CLS-Luigi"] = f"\\textbf{{\\underline{{{row['Accuracy CLS-Luigi']}}}}}"
        elif row["Winner"] == "AutoSklearn":
            row["Accuracy AutoSklearn"] = f"\\textbf{{\\underline{{{row['Accuracy AutoSklearn']}}}}}"
        return row

    return scores_df.apply(func, axis=1)

scores_df = highlight_winners_for_latex(scores_df)
scores_df

Unnamed: 0,Dataset,# Pipelines CLS-Luigi,# Pipelines AutoSklearn,Accuracy CLS-Luigi,Accuracy AutoSklearn,Winner
0,spambase,1183,71,0.939,\textbf{\underline{0.939}},AutoSklearn
1,sylvine,1183,205,\textbf{\underline{0.947}},0.945,CLS-Luigi
2,bank-marketing,1183,219,0.824,\textbf{\underline{0.831}},AutoSklearn
3,phoneme,1183,142,\textbf{\underline{0.915}},0.885,CLS-Luigi
4,kc1,1183,116,\textbf{\underline{0.891}},0.863,CLS-Luigi
5,pc4,1183,191,\textbf{\underline{0.911}},0.911,CLS-Luigi
6,wilt,1183,227,\textbf{\underline{0.983}},0.981,CLS-Luigi
7,qsar-biodeg,1183,109,\textbf{\underline{0.849}},0.811,CLS-Luigi
8,mozilla4,1183,379,\textbf{\underline{0.963}},0.95,CLS-Luigi
9,steel-plates-fault,1183,221,\textbf{\underline{1.0}},0.995,CLS-Luigi


In [47]:
scores_df.drop(columns=["Winner"], inplace=True)

In [48]:
scores_df.index = scores_df["Dataset"]

scores_df

Unnamed: 0_level_0,Dataset,# Pipelines CLS-Luigi,# Pipelines AutoSklearn,Accuracy CLS-Luigi,Accuracy AutoSklearn
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
spambase,spambase,1183,71,0.939,\textbf{\underline{0.939}}
sylvine,sylvine,1183,205,\textbf{\underline{0.947}},0.945
bank-marketing,bank-marketing,1183,219,0.824,\textbf{\underline{0.831}}
phoneme,phoneme,1183,142,\textbf{\underline{0.915}},0.885
kc1,kc1,1183,116,\textbf{\underline{0.891}},0.863
pc4,pc4,1183,191,\textbf{\underline{0.911}},0.911
wilt,wilt,1183,227,\textbf{\underline{0.983}},0.981
qsar-biodeg,qsar-biodeg,1183,109,\textbf{\underline{0.849}},0.811
mozilla4,mozilla4,1183,379,\textbf{\underline{0.963}},0.95
steel-plates-fault,steel-plates-fault,1183,221,\textbf{\underline{1.0}},0.995


In [49]:
scores_df = scores_df.loc[dataset_info.index]

scores_df

Unnamed: 0_level_0,Dataset,# Pipelines CLS-Luigi,# Pipelines AutoSklearn,Accuracy CLS-Luigi,Accuracy AutoSklearn
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
higgs,higgs,1183,120,0.731,\textbf{\underline{0.733}}
numerai28.6,numerai28.6,1183,233,\textbf{\underline{0.515}},0.512
mozilla4,mozilla4,1183,379,\textbf{\underline{0.963}},0.95
eeg-eye-state,eeg-eye-state,1183,192,\textbf{\underline{0.977}},0.963
bank-marketing,bank-marketing,1183,219,0.824,\textbf{\underline{0.831}}
phoneme,phoneme,1183,142,\textbf{\underline{0.915}},0.885
sylvine,sylvine,1183,205,\textbf{\underline{0.947}},0.945
wilt,wilt,1183,227,\textbf{\underline{0.983}},0.981
spambase,spambase,1183,71,0.939,\textbf{\underline{0.939}}
madelon,madelon,1183,746,\textbf{\underline{0.915}},0.896


In [50]:
def rount_func(x):
    if isinstance(x, str):
        return x
    
    return round(x, 3)

In [51]:
print(
    scores_df.to_latex(
        index=False,
        formatters={
            "Accuracy CLS-Luigi": rount_func,
            "Accuracy AutoSklearn": rount_func,
        }
        )
    )

\begin{tabular}{lrrll}
\toprule
Dataset & # Pipelines CLS-Luigi & # Pipelines AutoSklearn & Accuracy CLS-Luigi & Accuracy AutoSklearn \\
\midrule
higgs & 1183 & 120 & 0.731 & \textbf{\underline{0.733}} \\
numerai28.6 & 1183 & 233 & \textbf{\underline{0.515}} & 0.512 \\
mozilla4 & 1183 & 379 & \textbf{\underline{0.963}} & 0.95 \\
eeg-eye-state & 1183 & 192 & \textbf{\underline{0.977}} & 0.963 \\
bank-marketing & 1183 & 219 & 0.824 & \textbf{\underline{0.831}} \\
phoneme & 1183 & 142 & \textbf{\underline{0.915}} & 0.885 \\
sylvine & 1183 & 205 & \textbf{\underline{0.947}} & 0.945 \\
wilt & 1183 & 227 & \textbf{\underline{0.983}} & 0.981 \\
spambase & 1183 & 71 & 0.939 & \textbf{\underline{0.939}} \\
madelon & 1183 & 746 & \textbf{\underline{0.915}} & 0.896 \\
ozone-level-8hr & 1183 & 163 & \textbf{\underline{0.949}} & 0.941 \\
kc1 & 1183 & 116 & \textbf{\underline{0.891}} & 0.863 \\
steel-plates-fault & 1183 & 221 & \textbf{\underline{1.0}} & 0.995 \\
pc4 & 1183 & 191 & \textbf{\underlin