In [15]:
import lightgbm as lgb
import numpy as np

## Prototype 1: use `lightgbm.train` with a learning-to-rank objective

In [14]:
X_train = np.random.random(size=(100, 3))
y_train = np.random.randint(low=0, high=3, size=100)
group_train = [20] * 5

X_test = np.random.random(size=(50, 3))
y_test = np.random.randint(low=0, high=3, size=50)
group_test = [10] * 5

In [17]:
train_set = lgb.Dataset(X_train, label=y_train, group=group_train)
test_set = lgb.Dataset(X_test, label=y_test, group=group_test)

In [20]:
params = {
    'objective': 'rank_xendcg',
    'num_iterations': 5
}
    
gbm = lgb.train(
    params,
    train_set,
    valid_sets=[test_set]
)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105
[LightGBM] [Info] Number of data points in the train set: 100, number of used features: 3


In [21]:
gbm.best_score

defaultdict(collections.OrderedDict,
            {'valid_0': OrderedDict([('ndcg@1',
                           np.float64(0.3333333333333333)),
                          ('ndcg@2', np.float64(0.33333333333333337)),
                          ('ndcg@3', np.float64(0.48243913572790253)),
                          ('ndcg@4', np.float64(0.563339261997658)),
                          ('ndcg@5', np.float64(0.5649721147346176))])})

## Prototype 2. Pass additional parameters via ray

In [25]:
from ray import tune, train

In [27]:
def objective(x, a, b):  # Define an objective function.
    return a * (x**0.5) + b

def trainable(config):  # Pass a "config" dictionary into your trainable.
    for x in range(20):  # "Train" for 20 iterations and compute intermediate scores.
        score = objective(x, config["a"], config["b"])
        train.report({"score": score})  # Send the score to Tune.

space = {"a": tune.uniform(0, 1), "b": tune.uniform(0, 1)}
tuner = tune.Tuner(
    trainable, param_space=space, tune_config=tune.TuneConfig(num_samples=10)
)
results = tuner.fit()

0,1
Current time:,2024-07-21 16:09:51
Running for:,00:00:07.90
Memory:,6.6/8.0 GiB

Trial name,status,loc,a,b,iter,total time (s),score
trainable_8d0b1_00000,TERMINATED,127.0.0.1:13696,0.00272812,0.801932,20,0.00370598,0.813823
trainable_8d0b1_00001,TERMINATED,127.0.0.1:13697,0.832212,0.430026,20,0.00292897,4.05755
trainable_8d0b1_00002,TERMINATED,127.0.0.1:13698,0.803056,0.569794,20,0.00418186,4.07023
trainable_8d0b1_00003,TERMINATED,127.0.0.1:13699,0.728951,0.437756,20,0.00290537,3.61518
trainable_8d0b1_00004,TERMINATED,127.0.0.1:13700,0.0364744,0.560293,20,0.00318837,0.719282
trainable_8d0b1_00005,TERMINATED,127.0.0.1:13701,0.971483,0.451571,20,0.00352502,4.68617
trainable_8d0b1_00006,TERMINATED,127.0.0.1:13702,0.0730874,0.296075,20,0.00367713,0.614656
trainable_8d0b1_00007,TERMINATED,127.0.0.1:13703,0.164792,0.663394,20,0.00300169,1.38171
trainable_8d0b1_00008,TERMINATED,127.0.0.1:13706,0.550588,0.962744,20,0.00144863,3.3627
trainable_8d0b1_00009,TERMINATED,127.0.0.1:13707,0.747,0.802802,20,0.00141406,4.0589


2024-07-21 16:09:51,234	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/hongsupshin/ray_results/trainable_2024-07-21_16-09-43' in 0.0130s.
2024-07-21 16:09:51,240	INFO tune.py:1041 -- Total run time: 7.92 seconds (7.89 seconds for the tuning loop).


In [36]:
results.get_best_result(metric='score', mode='max')

Result(
  metrics={'score': 4.686165492417486},
  path='/Users/hongsupshin/ray_results/trainable_2024-07-21_16-09-43/trainable_8d0b1_00005_5_a=0.9715,b=0.4516_2024-07-21_16-09-43',
  filesystem='local',
  checkpoint=None
)

In [37]:
results.get_dataframe()

Unnamed: 0,score,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore,config/a,config/b,logdir
0,0.813823,1721596188,,False,20,8d0b1_00000,2024-07-21_16-09-48,8.8e-05,0.003706,13696,HSAir.lan,127.0.0.1,0.003706,20,0.002728,0.801932,8d0b1_00000
1,4.057555,1721596188,,False,20,8d0b1_00001,2024-07-21_16-09-48,0.00013,0.002929,13697,HSAir.lan,127.0.0.1,0.002929,20,0.832212,0.430026,8d0b1_00001
2,4.070235,1721596188,,False,20,8d0b1_00002,2024-07-21_16-09-48,0.000109,0.004182,13698,HSAir.lan,127.0.0.1,0.004182,20,0.803056,0.569794,8d0b1_00002
3,3.615182,1721596188,,False,20,8d0b1_00003,2024-07-21_16-09-48,0.000297,0.002905,13699,HSAir.lan,127.0.0.1,0.002905,20,0.728951,0.437756,8d0b1_00003
4,0.719282,1721596188,,False,20,8d0b1_00004,2024-07-21_16-09-48,9.9e-05,0.003188,13700,HSAir.lan,127.0.0.1,0.003188,20,0.036474,0.560293,8d0b1_00004
5,4.686165,1721596188,,False,20,8d0b1_00005,2024-07-21_16-09-48,0.000168,0.003525,13701,HSAir.lan,127.0.0.1,0.003525,20,0.971483,0.451571,8d0b1_00005
6,0.614656,1721596188,,False,20,8d0b1_00006,2024-07-21_16-09-48,0.000141,0.003677,13702,HSAir.lan,127.0.0.1,0.003677,20,0.073087,0.296075,8d0b1_00006
7,1.381706,1721596188,,False,20,8d0b1_00007,2024-07-21_16-09-48,0.000124,0.003002,13703,HSAir.lan,127.0.0.1,0.003002,20,0.164792,0.663394,8d0b1_00007
8,3.362703,1721596191,,False,20,8d0b1_00008,2024-07-21_16-09-51,5.8e-05,0.001449,13706,HSAir.lan,127.0.0.1,0.001449,20,0.550588,0.962744,8d0b1_00008
9,4.0589,1721596191,,False,20,8d0b1_00009,2024-07-21_16-09-51,6e-05,0.001414,13707,HSAir.lan,127.0.0.1,0.001414,20,0.747,0.802802,8d0b1_00009


## Prototype 3. lightgbm + ray + 

How it would work for the LTR at work
- group: synthetic creation
    - bootstrap sample size 
- benchmark: whatever data I need
- CV: try
- hyperparameter tuning: try

In [None]:
config = {
        "objective": "binary",
        "metric": ["binary_error", "binary_logloss"],
        "verbose": -1,
        "boosting_type": tune.grid_search(["gbdt", "dart"]),
        "num_leaves": tune.randint(10, 1000),
        "learning_rate": tune.loguniform(1e-8, 1e-1),
    }