In [1]:
import pandas as pd
from GlassBox.figs.figs_utils import FIGSGridSearch
from imodels import FIGSClassifier
%reload_ext autoreload
%autoreload 2

In [2]:
FIGS_parameter = dict(
    max_rules= [5,10,15,20,25],
    max_trees=[2,3,4,5,6,7,8,9,10],
    min_impurity_decrease=[0.1, 0.2, 0.3],
)

# Grid-search

In [3]:
gs = FIGSGridSearch(
    path_dataset="../../outputs/scores.csv",
    task="Classifier",
    random_state=841)

In [4]:
best_ = gs.grid_search(FIGSClassifier, FIGS_parameter)
gs.save_model(best_[0], name="figsclass_unbalanced")

100%|██████████| 135/135 [01:57<00:00,  1.15it/s, nDCG=0.731]


# Metric evalutation

In [5]:
best_model = gs.load_model(name="figsclass_unbalanced")

In [6]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.5773,0.6203,0.6482
Validation,0.6077,0.6906,0.7306
Test,0.5936,0.6512,0.6902


# Grid search - balanced

In [7]:
gs = FIGSGridSearch(
    path_dataset="../../outputs/balanced_scores.csv",
    task="Classifier",
    random_state=841)

In [8]:
best_ = gs.grid_search(FIGSClassifier, FIGS_parameter)
gs.save_model(best_[0], name="figsclass_balanced")

100%|██████████| 135/135 [00:38<00:00,  3.51it/s, nDCG=0.877]


In [9]:
best_model = gs.load_model(name="figsclass_balanced")

In [10]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.6226,0.7468,0.8153
Validation,0.6412,0.8583,0.8766
Test,0.6325,0.7941,0.8561


## Example of Job-offer

In [11]:
# qId = 1
# job_curricula = gs.test[gs.test["qId"] == qId]
# 
# y_pred = best_model.predict(np.asarray(job_curricula.iloc[:, 2:13]))
# 
# y_pred = pd.DataFrame(y_pred, index=job_curricula.index, columns=["lambdas"])
# dt_final = pd.merge(job_curricula, y_pred, left_index=True, right_index=True)
# dt_final.sort_values("labels", ascending=False)["labels"].head(15)
# dt_final.sort_values("lambdas",ascending=False)["labels"].head(15)
# print(best_model)