<a href="https://colab.research.google.com/github/idio4/task_vk/blob/main/VK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow_ranking
!pip install optuna



In [2]:
import pandas as pd
import numpy as np
import tensorflow_ranking as tfr
import xgboost as xgb
from xgboost import plot_importance
import time
import optuna

In [3]:
from google.colab import files
uploaded = files.upload()

Saving test_df.csv to test_df (1).csv
Saving train_df.csv to train_df (1).csv


In [4]:
df = pd.read_csv("train_df.csv")
df_test = pd.read_csv("test_df.csv")

In [5]:
X_train = df.drop('target', axis=1)
y_train = df['target']
X_test = df_test.drop('target', axis=1)
y_test = df_test['target']

In [6]:
def objective(trial):
    params = {
        'objective': 'rank:ndcg',
        'eta': trial.suggest_loguniform('eta', 0.01, 0.2),
        'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'max_depth': trial.suggest_int('max_depth', 1, 9),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
    }


    dtrain = xgb.DMatrix(X_train, label=y_train)
    bst = xgb.train(params, dtrain, num_boost_round=100)

    dval = xgb.DMatrix(X_test)
    preds = bst.predict(dval)

    y_pred = preds.reshape(1, -1)
    y_true = y_test.values.reshape(1, -1)
    ndcg = tfr.keras.metrics.NDCGMetric()
    score = ndcg(y_true, y_pred).numpy()

    return score


In [7]:
#https://randomrealizations.com/posts/xgboost-parameter-tuning-with-optuna/
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler)
tic = time.time()
while time.time() - tic < 300:
    study.optimize(objective, n_trials=1)

[I 2024-03-12 14:34:19,477] A new study created in memory with name: no-name-23310603-6622-4d16-9e7e-d374bfe4f52b
  'eta': trial.suggest_loguniform('eta', 0.01, 0.2),
  'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
[I 2024-03-12 14:34:21,735] Trial 0 finished with value: 0.42696598172187805 and parameters: {'eta': 0.030710573677773714, 'gamma': 0.40338008326003827, 'min_child_weight': 8, 'max_depth': 6, 'subsample': 0.5780093202212182, 'colsample_bytree': 0.5779972601681014, 'lambda': 2.9152036385288193e-08, 'alpha': 0.08499808989182997}. Best is trial 0 with value: 0.42696598172187805.
[I 2024-03-12 14:34:23,565] Trial 1 finished with value: 0.5126597285270691 and parameters: {'eta': 0.06054365855469246, 'gamma': 0.0046193473743

In [9]:
best_params = study.best_params
dtrain = xgb.DMatrix(X_train, label=y_train)
bst = xgb.train(best_params, dtrain, num_boost_round=100)

In [10]:
dval = xgb.DMatrix(X_test)
preds = bst.predict(dval)
y_pred = preds.reshape(1,-1)
y_true = y_test.values.reshape(1,-1)
ndcg = tfr.keras.metrics.NDCGMetric()
ndcg(y_true, y_pred).numpy()

0.586037