In [None]:
# install interpret if not already installed
try:
    import interpret
except ModuleNotFoundError:
    !pip install -U --quiet scikit-learn xgboost interpret-core

In [None]:
# install powerlift if not already installed
try:
    import powerlift
except ModuleNotFoundError:
    !pip install -U --quiet powerlift[datasets,postgres]

In [None]:
def trial_filter(task):
    min_samples = 1
    max_samples = 1000000000000
    
    if task.scalar_measure("n_rows") < min_samples:
        return []

    if max_samples < task.scalar_measure("n_rows"):
        return []

    if task.origin == "openml":
        exclude_set = set()
#        exclude_set = set(['isolet', 'Devnagari-Script', 'CIFAR_10'])
#        exclude_set = set([
#            'Fashion-MNIST', 'mfeat-pixel', 'Bioresponse',
#            'mfeat-factors', 'isolet', 'cnae-9', "Internet-Advertisements",
#            'har', 'Devnagari-Script', 'mnist_784', 'CIFAR_10',
#        ])
        if task.name in exclude_set:
            return []
    elif task.origin == "pmlb":
        exclude_set = set()
        if task.name in exclude_set:
            return []
    else:
        raise Exception(f"Unrecognized task origin {task.origin}")

    return [
        "xgboost-base",
        "ebm-base",
    ]

In [None]:
def trial_runner(trial):
    seed=42
    extra_params = {}
    # extra_params = {"interactions":0, "max_rounds":5}
    
    from xgboost import XGBClassifier, XGBRegressor
    from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor
    from sklearn.metrics import roc_auc_score, root_mean_squared_error, log_loss
    from sklearn.model_selection import train_test_split
    from time import time
    import warnings
    import pandas as pd

    X, y, meta = trial.task.data(["X", "y", "meta"])

    # TODO: move this into powerlift
    for col_name in X.columns:
        col = X[col_name]
        if col.dtype.name == 'object':
            X[col_name] = col.astype(pd.CategoricalDtype(ordered=False))
        elif col.dtype.name == 'category' and col.cat.ordered:
            X[col_name] = col.cat.as_unordered()
    import numpy as np
    _, y = np.unique(y, return_inverse=True)

    stratification = None
    if trial.task.problem in ["binary", "multiclass"]:
        # use stratefied, otherwise eval can fail if one of the classes is not in the training set
        stratification = y
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=stratification, random_state=seed)

    # Specify method
    eval_fn2 = None
    if trial.task.problem in ["binary", "multiclass"]:
        if trial.method.name == "xgboost-base":
            est = XGBClassifier(enable_categorical=True)
        elif trial.method.name == "ebm-base":
            est = ExplainableBoostingClassifier(**extra_params)
        else:
            raise RuntimeError(f"Method unavailable for {trial.method.name}")

        if trial.task.problem == "binary":
            predict_fn = lambda x: est.predict_proba(x)[:,1]
            eval_name1 = "auc"
            eval_fn1 = lambda *args, **kwargs: roc_auc_score(*args, **kwargs)
            eval_params1 = {}
            eval_name2 = "log_loss"
            eval_fn2 = log_loss
        elif trial.task.problem == "multiclass":
            predict_fn = lambda x: est.predict_proba(x)
            eval_name1 = "multi_auc"
            eval_fn1 = lambda *args, **kwargs: roc_auc_score(*args, **kwargs)
            eval_params1 = {"average": "weighted", "multi_class": "ovr"}
            eval_name2 = "cross_entropy"
            eval_fn2 = log_loss
    elif trial.task.problem == "regression":
        if trial.method.name == "xgboost-base":
            est = XGBRegressor(enable_categorical=True)
        elif trial.method.name == "ebm-base":
            est = ExplainableBoostingRegressor(**extra_params)
        else:
            raise RuntimeError(f"Method unavailable for {trial.method.name}")
            
        q75, q25 = np.percentile(y_train, [75, 25])
        interquartile_range = q75 - q25
    
        predict_fn = lambda x: est.predict(x)
        eval_name1 = "rmsdiqr"
        eval_fn1 = lambda *args, **kwargs: root_mean_squared_error(*args, **kwargs) / interquartile_range
        eval_params1 = {}

    global global_counter
    try:
        global_counter += 1
    except NameError:
        global_counter = 1
    
    # Train
    start_time = time()
    print(f"FIT: {global_counter}, {trial.task.origin}, {trial.task.name}, {trial.method.name}, ", end="")
    with warnings.catch_warnings():  
        warnings.filterwarnings("ignore") 
        est.fit(X_train, y_train)
    end_time = time()
    elapsed_time = end_time - start_time
    trial.log("fit_time", elapsed_time)
    
    # Predict
    start_time = time()
    predictions = predict_fn(X_test)
    end_time = time()
    elapsed_time = end_time - start_time
    trial.log("predict_time", elapsed_time)

    # Score
    eval_score = eval_fn1(y_test, predictions, **eval_params1)
    trial.log(eval_name1, eval_score)

    if eval_fn2 is not None:
        eval_score2 = eval_fn2(y_test, predictions)
        trial.log(eval_name2, eval_score2)
    
    print(eval_score)

In [None]:
force_recreate=False
exist_ok=True

import uuid
experiment_name = "myexperiment" + "__" + str(uuid.uuid4())
print("Experiment name: " + str(experiment_name))

from powerlift.bench import retrieve_openml, retrieve_pmlb, retrieve_catboost_50k
from powerlift.bench import Benchmark, Store, populate_with_datasets
from powerlift.executors import LocalMachine
from itertools import chain
import os

# Initialize database (if needed).
store = Store(f"sqlite:///{os.getcwd()}/powerlift.db", force_recreate=force_recreate)

cache_dir="~/.powerlift"
data_retrieval = chain(
    # retrieve_catboost_50k(cache_dir=cache_dir),
    # retrieve_pmlb(cache_dir=cache_dir),
    retrieve_openml(cache_dir=cache_dir),
)

# This downloads datasets once and feeds into the database.
populate_with_datasets(store, data_retrieval, exist_ok=exist_ok)

# Run experiment
benchmark = Benchmark(store, name=experiment_name)
benchmark.run(trial_runner, trial_filter, executor=LocalMachine(store, debug_mode=True))

In [None]:
benchmark.wait_until_complete()

In [None]:
# re-establish connection
#benchmark = Benchmark(conn_str, name=experiment_name)

In [None]:
status_df = benchmark.status()
for errmsg in status_df["errmsg"]:
    if errmsg is not None:
        print("ERROR: " + str(errmsg))
print(status_df['status'].value_counts().to_string(index=True, header=False))

In [None]:
results_df = benchmark.results()
results_df.to_csv('results.csv', index=None)

In [None]:
# reload if analyzing later
import pandas as pd
results_df = pd.read_csv('results.csv')

In [None]:
import pandas as pd

averages = results_df.groupby(['method', 'name'])['num_val'].mean().unstack().reset_index()

metric_ranks = results_df.pivot_table('num_val', ['task', 'name'], 'method')
metric_ranks = metric_ranks.rank(axis=1, ascending=True, method='min')
metric_ranks = metric_ranks.groupby('name').mean().transpose()
metric_ranks.columns = [f"{col}_RANK" for col in metric_ranks.columns]
metric_ranks = metric_ranks.reset_index()

overall_rank = results_df[results_df['name'].isin(['log_loss', 'cross_entropy', 'rmsdiqr'])]
overall_rank = overall_rank.pivot_table('num_val', 'task', 'method')
overall_rank = overall_rank.rank(axis=1, ascending=True, method='min')
overall_rank = overall_rank.mean()
overall_rank = overall_rank.to_frame(name='RANK').reset_index()

desired_columns = ['method', 'RANK', 'auc', 'multi_auc', 'rmsdiqr', 'log_loss_RANK', 'cross_entropy_RANK', 'rmsdiqr_RANK', 'fit_time', 'predict_time']
combined_df = averages.merge(metric_ranks, on='method').merge(overall_rank, on='method')
combined_df = combined_df.reindex(columns=desired_columns)
combined_df = combined_df.sort_values(by='RANK')

print(combined_df.to_string(index=False))