In [19]:
import pandas as pd
import numpy as np
from GlassBox.figs.figs_utils import FIGSGridSearch
from imodels import FIGSRegressor
%reload_ext autoreload
%autoreload 2

In [3]:
FIGS_parameter = dict(
    max_rules= [5,10,15,20,25],
    max_trees=[2,3,4,5,6,7,8,9,10],
    min_impurity_decrease=[0.1, 0.2, 0.3],
)

# Grid-search

In [4]:
gs = FIGSGridSearch(
    path_dataset="../../outputs/scores.csv",
    task="Regressor",
    random_state=841)

In [4]:
best_ = gs.grid_search(FIGSRegressor, FIGS_parameter)
gs.save_model(best_[0], name="figsreg_unbalanced")

100%|██████████| 135/135 [01:55<00:00,  1.17it/s, nDCG=0.831]


# Metric evalutation

In [5]:
best_model = gs.load_model(name="figsreg_unbalanced")

In [6]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.8338,0.7926,0.8003
Validation,0.7842,0.8044,0.8307
Test,0.7883,0.7797,0.8059


# Example of Job-offer

In [14]:
gs.test[gs.test["qId"]==15][["kId","labels"]].sort_values("labels",ascending=False).head(15)

Unnamed: 0,kId,labels
3165,165,4
3091,91,3
3103,103,3
3139,139,3
3199,199,3
3118,118,3
3179,179,2
3106,106,2
3027,27,2
3056,56,2


In [30]:
features = gs.test[gs.test["qId"]==15]
y_pred = best_model.predict(np.asarray(features.iloc[:,2:13].values))
y_pred = pd.DataFrame(y_pred, index=features.index, columns=["lambdas"])
dt_final = pd.merge(features, y_pred, left_index=True, right_index=True)
dt_final.sort_values("lambdas",ascending=False)[["kId","labels"]].head(15)

Unnamed: 0,kId,labels
3165,165,4
3187,187,2
3051,51,2
3167,167,2
3103,103,3
3156,156,1
3172,172,2
3047,47,2
3068,68,2
3126,126,2


In [None]:
# qId = 1
# job_curricula = gs.test[gs.test["qId"] == qId]
# 
# y_pred = best_model.predict(np.asarray(job_curricula.iloc[:, 2:13]))
# 
# y_pred = pd.DataFrame(y_pred, index=job_curricula.index, columns=["lambdas"])
# dt_final = pd.merge(job_curricula, y_pred, left_index=True, right_index=True)
# dt_final.sort_values("labels", ascending=False)["labels"].head(15)
# dt_final.sort_values("lambdas",ascending=False)["labels"].head(15)
# print(best_model)

# Grid search - balanced

In [7]:
gs = FIGSGridSearch(
    path_dataset="../../outputs/balanced_scores.csv",
    task="Regressor",
    random_state=841)

In [8]:
best_ = gs.grid_search(FIGSRegressor, FIGS_parameter)
gs.save_model(best_[0], name="figsreg_balanced")

100%|██████████| 135/135 [00:38<00:00,  3.49it/s, nDCG=0.931]


In [9]:
best_model = gs.load_model(name="figsreg_balanced")

In [10]:
# nDCG on test-set
nDCG_train = gs.eval_model(model=best_model, df=gs.train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.8744,0.8694,0.9037
Validation,0.8475,0.9183,0.9306
Test,0.829,0.872,0.9152
