In [1]:
import pandas as pd
from utils.io_methods import load_json
import os 


In [2]:
def load_autogloun_score(path):
    with open(path, "r") as in_path:
        score_str = in_path.read()
        
    return float(score_str.split(" ")[-1])


In [3]:
datasets = [
    'spambase', # exists in autosklearn
    'sylvine',
    'bank-marketing',
    'phoneme',
    'kc1',  # exists in autosklearn
    'pc4', # exists in autosklearn
    'wilt', # exists in autosklearn
    'qsar-biodeg', # exists in autosklearn
    'mozilla4', # exists in autosklearn
    'steel-plates-fault', # exists in autosklearn
    'ozone-level-8hr', # exists in autosklearn
    'eeg-eye-state', # exists in autosklearn
    'madelon',
    'numerai28.6',
    'higgs'
]

In [4]:
scores = {
    "dataset": datasets, 
    "CLS-Luigi": [],
    "AutoSklearn": [],
    "AutoGluon": []
}
for ds in datasets:
    # CLS Luigi score
    cls_luigi_score = load_json(f"binary_classfication_pipelines/logs/{ds}_test_summary.json")["test_accuracy"]
    scores["CLS-Luigi"].append(cls_luigi_score)
    
    # AutoSklearn
    askl_score = load_json(f"askl/results/{ds}/best_pipeline_summary.json")["test_accuracy"]
    scores["AutoSklearn"].append(askl_score)
    
    
    # AutoGluon
    
    ag_score = load_autogloun_score(f"ag/{ds}/score.txt")
    scores["AutoGluon"].append(ag_score)
    


In [5]:
scores_df = pd.DataFrame(data=scores)

scores_df

Unnamed: 0,dataset,CLS-Luigi,AutoSklearn,AutoGluon
0,spambase,0.93926,0.939262,0.937093
1,sylvine,0.94737,0.945419,0.931774
2,bank-marketing,0.8242,0.830813,0.827032
3,phoneme,0.91497,0.885397,0.909427
4,kc1,0.891,0.862559,0.876777
5,pc4,0.91096,0.910959,0.910959
6,wilt,0.98347,0.981405,0.983471
7,qsar-biodeg,0.84906,0.811321,0.886792
8,mozilla4,0.96334,0.949839,0.952412
9,steel-plates-fault,1.0,0.994872,1.0


In [6]:
def ag_wins(row):
    if ( row["AutoGluon"] > row["AutoSklearn"]) and (row["AutoGluon"] > row["CLS-Luigi"]):
        return True
    return False
    
    
scores_df["AutoGluon_wins"] =   scores_df.apply(ag_wins, axis=1)  

In [7]:
scores_df

Unnamed: 0,dataset,CLS-Luigi,AutoSklearn,AutoGluon,AutoGluon_wins
0,spambase,0.93926,0.939262,0.937093,False
1,sylvine,0.94737,0.945419,0.931774,False
2,bank-marketing,0.8242,0.830813,0.827032,False
3,phoneme,0.91497,0.885397,0.909427,False
4,kc1,0.891,0.862559,0.876777,False
5,pc4,0.91096,0.910959,0.910959,False
6,wilt,0.98347,0.981405,0.983471,True
7,qsar-biodeg,0.84906,0.811321,0.886792,True
8,mozilla4,0.96334,0.949839,0.952412,False
9,steel-plates-fault,1.0,0.994872,1.0,False


In [8]:
import pickle 

with open("/home/hadi/cls-luigi_paper/ag/bank-marketing/ag.pkl", "rb") as in_path:
    ag = pickle.load(in_path)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
ag.leaderboard()

Unnamed: 0,model,score_val,eval_metric,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.813445,accuracy,0.340604,63.707998,0.007228,2.228731,2,True,35
1,LightGBM_BAG_L1,0.81271,accuracy,0.078744,8.096107,0.078744,8.096107,1,True,4
2,CatBoost_r13_BAG_L1,0.811555,accuracy,0.017871,27.864878,0.017871,27.864878,1,True,25
3,CatBoost_r177_BAG_L1,0.810714,accuracy,0.011088,13.197829,0.011088,13.197829,1,True,14
4,CatBoost_r9_BAG_L1,0.810609,accuracy,0.033088,32.690435,0.033088,32.690435,1,True,18
5,CatBoost_r137_BAG_L1,0.810399,accuracy,0.01253,13.475365,0.01253,13.475365,1,True,23
6,LightGBMXT_BAG_L1,0.810189,accuracy,0.207235,9.673864,0.207235,9.673864,1,True,3
7,XGBoost_BAG_L1,0.809874,accuracy,0.038009,8.042691,0.038009,8.042691,1,True,11
8,CatBoost_r50_BAG_L1,0.809559,accuracy,0.021098,13.413903,0.021098,13.413903,1,True,33
9,LightGBM_r188_BAG_L1,0.809559,accuracy,0.326863,11.585895,0.326863,11.585895,1,True,27
