In [1]:
!pip install xgboost lightgbm catboost scikit-learn numpy

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import ndcg_score
from xgboost import XGBRanker
from lightgbm import LGBMRanker
from catboost import CatBoostRanker

# Generate synthetic ranking dataset
np.random.seed(42)
n_samples = 1000
n_features = 5
n_groups = 10  # Number of groups for ranking

X = np.random.rand(n_samples, n_features)
y = np.random.randint(0, 5, size=n_samples)  # Relevance scores (0-4)
groups = np.array_split(np.arange(n_samples), n_groups)  # Group data for ranking

# Group lengths
group_lengths = [len(g) for g in groups]

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
group_train = group_lengths[: int(0.8 * len(group_lengths))]
group_test = group_lengths[int(0.8 * len(group_lengths)):]

# Initialize models
models = {
    "XGBoost": XGBRanker(objective="rank:pairwise", eval_metric="ndcg"),
    "LightGBM": LGBMRanker(),
    "CatBoost": CatBoostRanker(loss_function="YetiRankPairwise", verbose=0)
}

# Train and evaluate each model
for name, model in models.items():
    print(f"Training {name}...")

    # Fit the model
    if name == "CatBoost":
        # CatBoost requires additional group_id for ranking
        group_id = np.repeat(np.arange(len(group_train)), group_train)
        model.fit(X_train, y_train, group_id=group_id)
    else:
        model.fit(X_train, y_train, group=group_train)

    # Predict and calculate NDCG
    y_pred = model.predict(X_test)
    ndcg = ndcg_score([y_test], [y_pred])

    print(f"{name} Results:")
    print(f"NDCG Score: {ndcg:.4f}")
    print("-" * 50)


Training XGBoost...
XGBoost Results:
NDCG Score: 0.8528
--------------------------------------------------
Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000999 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1275
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 5
LightGBM Results:
NDCG Score: 0.8924
--------------------------------------------------
Training CatBoost...
CatBoost Results:
NDCG Score: 0.8654
--------------------------------------------------
