In [None]:
import pandas as pd
import numpy as np

from tqdm import tqdm

from collections import Counter

import gc
import cloudpickle

import os, sys
import random

import warnings

import xgboost as xgb

print('Using \033[34mXGBoost', xgb.__version__, '\033[0m')

## Main Part

In [None]:
X_train = pd.read_parquet('../input/data-for-final-model/train.parquet')
X_test = pd.read_parquet('../input/data-for-final-model/valid.parquet')

In [None]:
group_train = X_train.groupby('image_id').size().values.tolist()
group_test = X_test.groupby('image_id').size().values.tolist()

In [None]:
y_test = X_test.pop('MATCH')
y_train = X_train.pop('MATCH')

X_test.drop(columns=['image_id', 'target_id'], inplace=True)
X_train.drop(columns=['image_id', 'target_id'], inplace=True)

In [None]:
X = pd.concat([X_train, X_test])
y = np.concatenate([y_train, y_test])
group_final = group_train + group_test

In [None]:
y.shape, y_train.shape, y_test.shape

In [None]:
def fit_save(X, y, group_final, params, model_filename):
    fit_params = { 'eval_metric': ['ndcg'] }
    ranker = xgb.sklearn.XGBRanker(**params, tree_method='gpu_hist', gpu_id=0)


    ranker.fit(X, y, group=group_final, eval_set=[(X_train, y_train), (X_test, y_test)], eval_group=[group_train, group_test], **fit_params)

    ranker.save_model(model_filename)

    result = ranker.evals_result()

    train_score = result['validation_0']['ndcg'][-1]
    test_score = result['validation_1']['ndcg'][-1]

    print('\033[34m', test_score, '\033[0m', train_score)       

In [None]:
params = {
    'objective': 'rank:ndcg',
    'eval_metric': 'ndcg',
    'n_estimators': 450,
    'random_state': 1187,
    'colsample_bytree': 1,
    'colsample_bylevel': 1,
    'colsample_bynode': 1,
    'max_depth': 2,
    'learning_rate': 0.05,
    'enable_categorical': True,
    'use_label_encoder': False,
    'reg_alpha': 1.0,
    'reg_lambda': 10
}

fit_save(X, y, group_final, params, 'final.model')