In [1]:
from sklearn.datasets import make_classification
import numpy as np


In [2]:
import xgboost as xgb

In [3]:
# make a synthetic ranking dataset for demonstraction
seed=1994
X,y=make_classification(random_state=seed)
rng=np.random.default_rng(seed)
n_query_groups=3
qid=rng.integers(0,n_query_groups,size=X.shape[0])

In [4]:
# sort the inputs based on query index
sorted_idx=np.argsort(qid)
X=X[sorted_idx]
y=y[sorted_idx]
qid=qid[sorted_idx]

In [6]:
ranker=xgb.XGBRanker(tree_method="hist",lambdarank_num_pair_per_sample=8,objective="rank:ndcg",lambdarank_pair_method="topk")
ranker.fit(X,y,qid=qid) # remove the extra indexing with sorted_idx

In [7]:
import pandas as pd


In [8]:
# `X`, `qid`, and `y` are from the previous snippet, they are all sorted by the `sorted_idx`.
df=pd.DataFrame(X,columns=[str(i) for i in range(X.shape[1])])
df["qid"]=qid

In [9]:
ranker.fit(df,y)# no need to pass qid as a separate argument

In [10]:
from sklearn.model_selection import StratifiedGroupKFold,cross_val_score

In [11]:
# works with cv in scikit-learn ,along with HPO utilities like GridSearchCV
kfold=StratifiedGroupKFold(shuffle=False)
cross_val_score(ranker,df,y,cv=kfold,groups=df.qid)



array([1., 1., 1., 0., 0.])

In [12]:
scores=ranker.predict(X)
sorted_idx=np.argsort(scores)[::-1]
# Sort the relevance scores from most relevant to least relevant
scores=scores[sorted_idx]