In [1]:
import pandas as pd
from GlassBox.figs.figs_utils import FIGSGridSearch
from imodels import FIGSClassifier
%reload_ext autoreload
%autoreload 2

In [2]:
FIGS_parameter = dict(
    max_rules= [5,10,15,20,25],
    max_trees=[2,3,4,5,6,7,8,9,10],
    min_impurity_decrease=[0.1, 0.2, 0.3],
)
gridsearch_parameters = dict( # unbalanced sources
    train="../../outputs/scores/scores_tr.csv",
    valid="../../outputs/scores/scores_vl.csv",
    test="../../outputs/scores/scores_ts.csv",
    task="Classification",
    nDCG_at=15
)
gridsearch_parameters2 = dict( # balanced sources
    train="../../outputs/bal_scores/scores_tr.csv",
    valid="../../outputs/bal_scores/scores_vl.csv",
    test="../../outputs/bal_scores/scores_ts.csv",
    task="Classification",
    nDCG_at=15
)

# Grid-search

In [3]:
gs = FIGSGridSearch(**gridsearch_parameters)

In [4]:
best_ = gs.grid_search(FIGSClassifier, FIGS_parameter)
gs.save_model(best_[0], name="FIGSClass_unbalanced")

100%|██████████| 135/135 [01:18<00:00,  1.72it/s, nDCG=0.603]


# Metric evalutation

In [5]:
best_model = gs.load_model(name="./saved_models/FIGSClass_unbalanced")

In [6]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.4501,0.5443,0.5632
Validation,0.5022,0.5857,0.6031
Test,0.4725,0.573,0.5888


# Grid search - balanced

In [7]:
gs = FIGSGridSearch(**gridsearch_parameters2)

In [8]:
best_ = gs.grid_search(FIGSClassifier, FIGS_parameter)
gs.save_model(best_[0], name="FIGSClass_balanced")

100%|██████████| 135/135 [01:02<00:00,  2.16it/s, nDCG=0.666]


In [9]:
best_model = gs.load_model(name="./saved_models/FIGSClass_balanced")

In [10]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.5116,0.6035,0.6259
Validation,0.569,0.6507,0.6662
Test,0.5019,0.6151,0.6342
