In [1]:
from BlackBox.lmart.LambdaMart_utils import LMARTGridsearch
import pandas as pd
import numpy as np
%reload_ext autoreload
%autoreload 2

In [2]:
lMart_parameter = dict(
    boosting_type=["dart", "gbdt"],
    num_leaves=[2, 5, 10, 20, 30, 40],
    max_depth=[-1],
    n_estimators=[75, 80, 100, 150, 200],
    learning_rate=[0.02, 0.05, 0.08, 0.1, 0.15, 0.2],
    reg_lambda=[0.00005, 0.0001, 0.0002, 0.0003]
)
gridsearch_parameters = dict( # unbalanced sources
    train="../../outputs/scores/scores_tr.csv",
    valid="../../outputs/scores/scores_vl.csv",
    test="../../outputs/scores/scores_ts.csv",
    nDCG_at=15
)
gridsearch_parameters2 = dict( # balanced sources
    train="../../outputs/bal_scores/scores_tr.csv",
    valid="../../outputs/bal_scores/scores_vl.csv",
    test="../../outputs/bal_scores/scores_ts.csv",
    nDCG_at=15
)

# Grid-search

In [3]:
gs = LMARTGridsearch(**gridsearch_parameters)

In [4]:
best_ = gs.grid_search(lMart_parameter)
gs.save_model(best_[0],"LGBMRanker_unbalanced")

100%|██████████| 1440/1440 [05:12<00:00,  4.61it/s, nDCG_15=0.847]


# Metric evaluation

In [5]:
best_model = gs.load_model("./saved_models/LGBMRanker_unbalanced")

In [6]:
# nDCG
nDCG_train = gs.eval_model(model=best_model, df=gs.train, qIds=gs.qIds_train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, qIds=gs.qIds_val, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, qIds=gs.qIds_test, nDCG_at=[1,10,15])

other_evals_tr = gs.other_eval(df=gs.test, qIds=gs.qIds_test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

print("Test set")
display(pd.DataFrame(other_evals_tr).T)

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.9867,0.9245,0.9098
Validation,0.9183,0.8598,0.847
Test,0.8717,0.852,0.8415


Test set


Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
random_permutation,0.4717,0.5759,0.5915
perfect_nDCG,1.0,1.0,1.0
worste_nDCG,0.125,0.3066,0.3341


# Example of Job-offer

In [7]:
gs.test[gs.test["qId"]==15][["kId","labels"]].sort_values("labels",ascending=False).head(15)

Unnamed: 0,kId,labels
171,256,3
4808,117,3
2839,228,3
3583,101,2
4227,87,2
1459,200,2
2043,251,2
1433,249,2
1483,105,2
4905,109,2


In [8]:
features = gs.test[gs.test["qId"]==15]
y_pred = best_model.predict(np.asarray(features.iloc[:,2:13].values))
y_pred = pd.DataFrame(y_pred, index=features.index, columns=["lambdas"])
dt_final = pd.merge(features, y_pred, left_index=True, right_index=True)
dt_final.sort_values("lambdas",ascending=False)[["kId","labels"]].head(15)

Unnamed: 0,kId,labels
171,256,3
2839,228,3
4808,117,3
2712,194,2
902,265,1
2259,281,2
1729,133,1
2307,151,2
3817,206,1
948,77,1


# Grid search - balanced

In [9]:
gs = LMARTGridsearch(**gridsearch_parameters2)

In [10]:
best_ = gs.grid_search(lMart_parameter)
gs.save_model(best_[0],"LGBMRanker_balanced")

100%|██████████| 1440/1440 [05:01<00:00,  4.77it/s, nDCG_15=0.877]


In [11]:
best_model = gs.load_model("./saved_models/LGBMRanker_balanced")

In [12]:
# nDCG
nDCG_train = gs.eval_model(model=best_model, df=gs.train, qIds=gs.qIds_train, nDCG_at=[1,10,15])
nDCG_valid = gs.eval_model(model=best_model, df=gs.valid, qIds=gs.qIds_val, nDCG_at=[1,10,15])
nDCG_test = gs.eval_model(model=best_model, df=gs.test, qIds=gs.qIds_test, nDCG_at=[1,10,15])

other_evals_tr = gs.other_eval(df=gs.test, qIds=gs.qIds_test, nDCG_at=[1,10,15])

display(pd.DataFrame([nDCG_train,nDCG_valid,nDCG_test],index=["Training","Validation","Test"]))

print("Test set")
display(pd.DataFrame(other_evals_tr).T)

Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
Training,0.9933,0.9596,0.9491
Validation,0.96,0.8961,0.8772
Test,0.925,0.9057,0.8909


Test set


Unnamed: 0,nDCG@1,nDCG@10,nDCG@15
random_permutation,0.4815,0.6017,0.622
perfect_nDCG,1.0,1.0,1.0
worste_nDCG,0.1382,0.3228,0.3545


# Example of Job-offer

In [13]:
# gs.test[gs.test["qId"]==85][["kId","labels"]].sort_values("labels",ascending=False).head(15)

Unnamed: 0,kId,labels


In [14]:
# features = gs.test[gs.test["qId"]==85]
# y_pred = best_model.predict(np.asarray(features.iloc[:,2:13].values))
# y_pred = pd.DataFrame(y_pred, index=features.index, columns=["lambdas"])
# dt_final = pd.merge(features, y_pred, left_index=True, right_index=True)
# dt_final.sort_values("lambdas",ascending=False)[["kId","labels"]].head(15)

ValueError: Found array with 0 sample(s) (shape=(0, 11)) while a minimum of 1 is required.