In [24]:
import pandas as pd
import warnings
import json 
from os.path import join as pjoin
from os import listdir
from statistics import median, geometric_mean


warnings.filterwarnings("ignore")

In [25]:
ASKL1 = "/home/hadi/cls-luigi_paper/askl/askl1_no_ens_results"
ASKL2 = "/home/hadi/cls-luigi_paper/askl/askl2_no_ens_results"
ASKL1_ENS = "/home/hadi/cls-luigi_paper/askl/askl1_ens_results"
ASKL2_ENS = "/home/hadi/cls-luigi_paper/askl/askl2_ens_results"
AG = "/home/hadi/cls-luigi_paper/ag/ag_results"

framework_results_paths = {"AutoGloun": AG,
                           "AutoSklearn 1 (No Ensembling)":ASKL1,
                           "AutoSklearn 2 (No Ensembling)": ASKL2,
                           "AutoSklearn 1 (With Ensembling)":ASKL1_ENS,
                           "AutoSklearn 2 (With Ensembling)": ASKL2_ENS}


ds_ids = [

        9967,  # steel-plates-fault
        9957,  # qsar-biodeg
        9952,  # phoneme
        9978,  # ozone-level-8hr
        146820,  # wilt
        3899,  # mozilla4
        9983,  # eeg-eye-state
        359962,  # kc1 classification
        359958,  # pc4 classification
        361066,  # bank-marketing classification
        359972,  # sylvin classification
        9976,  # Madelon
        167120,  # numerai28.6
        146606,  # higgs
        43,  #spambase
    ]



dataset_names = [
        'higgs',
        'numerai28.6',
        'mozilla4', # exists in autosklearn
        'eeg-eye-state', # exists in autosklearn
        'bank-marketing',
        'phoneme',
        'sylvine',
        'wilt', # exists in autosklearn
        'spambase', # exists in autosklearn
        'madelon',
        'ozone-level-8hr', # exists in autosklearn
        'kc1',  # exists in autosklearn
        'steel-plates-fault', # exists in autosklearn        
        'pc4', # exists in autosklearn
        'qsar-biodeg' # exists in autosklearn
    ]

In [26]:
def load_json(path):
    with open(path, "r") as f:
        return json.load(f)

In [27]:
results_dict = {
    "dataset_name": [], 
    "AutoSklearn 1 (No Ensembling)": [],
    "AutoSklearn 1 (With Ensembling)": [],
    "AutoSklearn 2 (No Ensembling)": [],
    "AutoSklearn 2 (With Ensembling)": [],
    "AutoGloun": [],
    "CLS-Luigi": []
}

for ds in dataset_names:
    results_dict["dataset_name"].append(ds)
    for framework, _path in framework_results_paths.items():
        framework_ds_scores_path = pjoin(_path, ds, "best_pipeline_summary.json")
        result = load_json(framework_ds_scores_path)
        results_dict[framework].append(result["test_accuracy"])


In [28]:
for ds in dataset_names:
    result = load_json(f"/home/hadi/cls-luigi_paper/binary_classfication_pipelines/logs/{ds}_test_summary.json")
    results_dict["CLS-Luigi"].append(result["test_accuracy"])

In [29]:
import numpy as np
results_df = pd.DataFrame.from_dict(results_dict)

def get_winner(row):

    scores = row.values[1:]
    
    best_index = np.argmax(scores) + 1
        

    duplicates_indecies = np.where(row == row.values[best_index])[0]
    if len(duplicates_indecies) > 1:
        return f"Draw {row.index[duplicates_indecies].tolist()}"
        
    return row.index[best_index]

results_df["winner"] = results_df.apply(get_winner, axis=1)


In [30]:
def color_rows(row):
    if row['winner'] == 'AutoGloun':
        return ['background-color: darkcyan'] * len(row)
    elif row['winner'] == 'AutoSklearn 2':
        return ['background-color: darkorange'] * len(row)
    elif row['winner'] == 'AutoSklearn 1':
        return ['background-color: darkmagenta'] * len(row)
    elif row['winner'] == 'CLS-Luigi':
        return ['background-color: darkgreen'] * len(row)
    else:
        return [''] * len(row)

# Apply the function to the DataFrame
styled_df = results_df.style.apply(color_rows, axis=1)

styled_df

Unnamed: 0,dataset_name,AutoSklearn 1 (No Ensembling),AutoSklearn 1 (With Ensembling),AutoSklearn 2 (No Ensembling),AutoSklearn 2 (With Ensembling),AutoGloun,CLS-Luigi,winner
0,higgs,0.733401,0.735849,0.735441,0.740745,0.747068,0.73055,AutoGloun
1,numerai28.6,0.511836,0.512874,0.519207,0.518272,0.517546,0.51537,AutoSklearn 2 (No Ensembling)
2,mozilla4,0.951125,0.949196,0.954341,0.952412,0.958199,0.96334,CLS-Luigi
3,eeg-eye-state,0.963284,0.96996,0.960614,0.977303,0.987316,0.9773,AutoGloun
4,bank-marketing,0.830813,0.835539,0.824197,0.825142,0.830813,0.8242,AutoSklearn 1 (With Ensembling)
5,phoneme,0.889094,0.902033,0.902033,0.907579,0.920518,0.91497,AutoGloun
6,sylvine,0.947368,0.94152,0.931774,0.937622,0.949318,0.94737,AutoGloun
7,wilt,0.981405,0.985537,0.985537,0.981405,0.983471,0.98347,"Draw ['AutoSklearn 1 (With Ensembling)', 'AutoSklearn 2 (No Ensembling)']"
8,spambase,0.939262,0.943601,0.94577,0.939262,0.937093,0.93926,AutoSklearn 2 (No Ensembling)
9,madelon,0.896154,0.907692,0.888462,0.884615,0.873077,0.91538,CLS-Luigi


In [32]:
type(styled_df)

pandas.io.formats.style.Styler

In [38]:

results_no_ag = results_df.drop(["AutoGloun", "winner"], axis=1, inplace=False)

results_no_ag["winner"] = results_no_ag.apply(get_winner, axis=1)
results_no_ag

Unnamed: 0,dataset_name,AutoSklearn 1 (No Ensembling),AutoSklearn 1 (With Ensembling),AutoSklearn 2 (No Ensembling),AutoSklearn 2 (With Ensembling),CLS-Luigi,winner
0,higgs,0.733401,0.735849,0.735441,0.740745,0.73055,AutoSklearn 2 (With Ensembling)
1,numerai28.6,0.511836,0.512874,0.519207,0.518272,0.51537,AutoSklearn 2 (No Ensembling)
2,mozilla4,0.951125,0.949196,0.954341,0.952412,0.96334,CLS-Luigi
3,eeg-eye-state,0.963284,0.96996,0.960614,0.977303,0.9773,AutoSklearn 2 (With Ensembling)
4,bank-marketing,0.830813,0.835539,0.824197,0.825142,0.8242,AutoSklearn 1 (With Ensembling)
5,phoneme,0.889094,0.902033,0.902033,0.907579,0.91497,CLS-Luigi
6,sylvine,0.947368,0.94152,0.931774,0.937622,0.94737,CLS-Luigi
7,wilt,0.981405,0.985537,0.985537,0.981405,0.98347,"Draw ['AutoSklearn 1 (With Ensembling)', 'Auto..."
8,spambase,0.939262,0.943601,0.94577,0.939262,0.93926,AutoSklearn 2 (No Ensembling)
9,madelon,0.896154,0.907692,0.888462,0.884615,0.91538,CLS-Luigi


In [None]:
results_df.value_counts("winner").rename_axis('Framework').to_frame('winnings')

In [None]:
import pickle 
def get_n_pipelines_ag(path):
    with open(path, "rb") as f:
        ag = pickle.load(f)
    
    return ag.leaderboard().shape[0]
    
    
    
def get_n_pipelines(ds_name, seed=42):
    
    n_pipelines_cls_luigi = load_json(f"binary_classfication_pipelines/logs/{ds_name}_train_summary.json")["n_runs"]
    
    n_pipelines_askl1 = load_json(f"askl/askl1_no_ens_results/{ds_name}/smac3-output/run_{seed}/stats.json")["submitted_ta_runs"]
    n_pipelines_askl2 = load_json(f"askl/askl2_no_ens_results/{ds_name}/smac3-output/run_{seed}/stats.json")["submitted_ta_runs"]
    n_pipelines_askl1_ens = load_json(f"askl/askl1_ens_results/{ds_name}/smac3-output/run_{seed}/stats.json")["submitted_ta_runs"]
    n_pipelines_askl2_ens = load_json(f"askl/askl2_ens_results/{ds_name}/smac3-output/run_{seed}/stats.json")["submitted_ta_runs"]
    n_pipelines_ag = get_n_pipelines_ag(f"/home/hadi/cls-luigi_paper/ag/ag_results/{ds_name}/askl_obj.pkl")

    
    return n_pipelines_askl1,n_pipelines_askl1_ens,  n_pipelines_askl2, n_pipelines_askl2_ens, n_pipelines_cls_luigi, n_pipelines_ag



get_n_pipelines("kc1")

In [20]:
n_pipelines_dict = {
    "dataset": [],
    "N pipelines AutoSklearn 1 (No Ensembling)": [],
    "N pipelines AutoSklearn 1 (With Ensembling)": [],
    "N pipelines AutoSklearn 2 (No Ensembling)": [],
    "N pipelines AutoSklearn 2 (With Ensembling)": [],
    "N pipelines CLS-Luigi": [],
    "N models AutoGluon":[]
}


for ds in dataset_names:
    askl1, askl1_ens, askl2, askl2_ens, cls_luigi, ag = get_n_pipelines(ds)
    
    n_pipelines_dict["dataset"].append(ds)
    n_pipelines_dict["N pipelines AutoSklearn 1 (No Ensembling)"].append(askl1)
    n_pipelines_dict["N pipelines AutoSklearn 1 (With Ensembling)"].append(askl1_ens)
    n_pipelines_dict["N pipelines AutoSklearn 2 (No Ensembling)"].append(askl2)
    n_pipelines_dict["N pipelines AutoSklearn 2 (With Ensembling)"].append(askl2_ens)
    
    n_pipelines_dict["N pipelines CLS-Luigi"].append(cls_luigi)
    n_pipelines_dict["N models AutoGluon"].append(ag)



    
df = pd.DataFrame(n_pipelines_dict)

In [None]:
df