In [1]:
from GlassBox.ebm.ebm_utils import EBMGridSearch
from interpret.glassbox import ExplainableBoostingClassifier

import pandas as pd
%reload_ext autoreload
%autoreload 2

In [2]:
EBM_parameter = dict(
    learning_rate= [0.005, 0.01, 0.02, 0.03],
    min_samples_leaf=[2, 3, 4, 5, 6, 7, 8, 9, 10],
    max_leaves=[2, 3, 4, 5, 6, 7],
    interactions=[0],
)
gridsearch_parameters = dict( # unbalanced sources
    train="../../outputs/scores/scores_tr.csv",
    valid="../../outputs/scores/scores_vl.csv",
    test="../../outputs/scores/scores_ts.csv",
    task="Classification",
    nDCG_at=15
)
gridsearch_parameters2 = dict( # balanced sources
    train="../../outputs/bal_scores/scores_tr.csv",
    valid="../../outputs/bal_scores/scores_vl.csv",
    test="../../outputs/bal_scores/scores_ts.csv",
    task="Classification",
    nDCG_at=15
)

# Grid-search

In [3]:
gs = EBMGridSearch(**gridsearch_parameters)

In [4]:
best_ = gs.grid_search(
    EBMModel=ExplainableBoostingClassifier,
    hyperparameters=EBM_parameter)
gs.save_model(best_[0], name="EBMClass_unbalanced")

100%|██████████| 216/216 [02:15<00:00,  1.60it/s, nDCG_15_at=0.798]


# Metric evaluation

In [5]:
best_model = gs.load_model(name="./saved_models/EBMClass_unbalanced")

In [6]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.8585,0.8514,0.8414
Validation,0.8744,0.8203,0.798
Test,0.8462,0.8211,0.8069


# Grid search - balanced

In [7]:
gs = EBMGridSearch(**gridsearch_parameters2)

In [8]:
best_ = gs.grid_search(
    EBMModel=ExplainableBoostingClassifier,
    hyperparameters=EBM_parameter)
gs.save_model(best_[0], name="EBMClass_balanced")

100%|██████████| 216/216 [01:30<00:00,  2.39it/s, nDCG_15_at=0.818]


In [9]:
best_model = gs.load_model(name="./saved_models/EBMClass_balanced")

In [10]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1, 10, 15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1, 10, 15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1, 10, 15])

display(pd.DataFrame([nDCG_train, nDCG_valid, nDCG_test], index=["Training", "Validation", "Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.8997,0.8549,0.854
Validation,0.8221,0.8228,0.8179
Test,0.8627,0.837,0.8369
