In [1]:
import sys
import os

sys.path.append(os.path.abspath('..'))

import yaml
import optuna

from src.data.load import load_data
from src.data.prepare import prepare_data
from src.models.cv_iterator import leave_last_k
from src.data.features import feature_engineering
from src.data.utils import build_rank_input
from src.models.tuner import BayesianSearch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# read config
with open('../config.yml', 'r') as file:
    config=yaml.load(file, Loader= yaml.SafeLoader)
del file

In [3]:
# load and prepare data
dataframes = load_data(config=config['data_loader'])
dataframes = prepare_data(dataframes=dataframes)

In [4]:
# train-test split
df_train, df_test = leave_last_k(df=dataframes['data'], config=config['optimization'])
df_train, df_valid = leave_last_k(df=df_train, config=config['optimization'])

In [5]:
user_item_features = feature_engineering(
    dataframes={'user': dataframes['user'], 'item': dataframes['item'], 'data': df_train}
    )

df_train, df_valid = [
    build_rank_input(ratings=df, features=user_item_features) for df in (df_train, df_valid)
    ]

In [6]:
# perform bayesian search
searcher = BayesianSearch(config['optimization']['hyper_params'], algorithm='XGBRanker')

def objective(trial) -> float:
    return searcher.fit(df_train, df_valid, trial)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

[I 2025-04-21 01:15:12,450] A new study created in memory with name: no-name-8eadb1e6-de14-4d79-9af6-937a614536cf
[I 2025-04-21 01:15:20,164] Trial 0 finished with value: 0.9617892773862179 and parameters: {'learning_rate': 0.24240001362310556, 'gamma': 2.7715715652927466, 'max_depth': 3, 'subsample': 0.5217270640738172, 'n_estimators': 279}. Best is trial 0 with value: 0.9617892773862179.
[I 2025-04-21 01:15:26,762] Trial 1 finished with value: 0.9628529398037687 and parameters: {'learning_rate': 0.2846294055763265, 'gamma': 2.2140847691963907, 'max_depth': 4, 'subsample': 0.9454721667366937, 'n_estimators': 246}. Best is trial 1 with value: 0.9628529398037687.
[I 2025-04-21 01:15:32,968] Trial 2 finished with value: 0.9627550224806303 and parameters: {'learning_rate': 0.22086209588100414, 'gamma': 4.894843972198751, 'max_depth': 5, 'subsample': 0.5211338844377278, 'n_estimators': 217}. Best is trial 1 with value: 0.9628529398037687.
[I 2025-04-21 01:15:35,568] Trial 3 finished with v

In [7]:
print("Best trial:")
print(study.best_trial.params)

Best trial:
{'learning_rate': 0.16982961353358975, 'gamma': 4.294388545896818, 'max_depth': 6, 'subsample': 0.618452753400277, 'n_estimators': 185}
