Skip to content

Commit

Permalink
[dask] Add DaskXGBRanker (#6576)
Browse files Browse the repository at this point in the history
* Initial support for distributed LTR using dask.

* Support `qid` in libxgboost.
* Refactor `predict` and `n_features_in_`, `best_[score/iteration/ntree_limit]`
  to avoid duplicated code.
* Define `DaskXGBRanker`.

The dask ranker doesn't support group structure, instead it uses query id and
convert to group ptr internally.
  • Loading branch information
trivialfis committed Jan 8, 2021
1 parent 96d3d32 commit 80065d5
Show file tree
Hide file tree
Showing 18 changed files with 753 additions and 349 deletions.
2 changes: 1 addition & 1 deletion include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \brief Get the number of features of the booster.
* \return number of features
*/
virtual uint32_t GetNumFeature() = 0;
virtual uint32_t GetNumFeature() const = 0;

/*!
* \brief Set additional attribute to the Booster.
Expand Down
19 changes: 15 additions & 4 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ def next_wrapper(self, this): # pylint: disable=unused-argument

def data_handle(data, label=None, weight=None, base_margin=None,
group=None,
qid=None,
label_lower_bound=None, label_upper_bound=None,
feature_names=None, feature_types=None,
feature_weights=None):
Expand All @@ -333,6 +334,7 @@ def data_handle(data, label=None, weight=None, base_margin=None,
self.proxy.set_info(label=label, weight=weight,
base_margin=base_margin,
group=group,
qid=qid,
label_lower_bound=label_lower_bound,
label_upper_bound=label_upper_bound,
feature_names=feature_names,
Expand Down Expand Up @@ -523,12 +525,14 @@ def __del__(self):
def set_info(self, *,
label=None, weight=None, base_margin=None,
group=None,
qid=None,
label_lower_bound=None,
label_upper_bound=None,
feature_names=None,
feature_types=None,
feature_weights=None):
'''Set meta info for DMatrix.'''
from .data import dispatch_meta_backend
if label is not None:
self.set_label(label)
if weight is not None:
Expand All @@ -537,6 +541,8 @@ def set_info(self, *,
self.set_base_margin(base_margin)
if group is not None:
self.set_group(group)
if qid is not None:
dispatch_meta_backend(matrix=self, data=qid, name='qid')
if label_lower_bound is not None:
self.set_float_info('label_lower_bound', label_lower_bound)
if label_upper_bound is not None:
Expand All @@ -546,7 +552,6 @@ def set_info(self, *,
if feature_types is not None:
self.feature_types = feature_types
if feature_weights is not None:
from .data import dispatch_meta_backend
dispatch_meta_backend(matrix=self, data=feature_weights,
name='feature_weights')

Expand Down Expand Up @@ -993,7 +998,7 @@ def _set_data_from_cuda_columnar(self, data):


Objective = Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]
Metric = Callable[[np.ndarray, DMatrix], Tuple[np.ndarray, np.ndarray]]
Metric = Callable[[np.ndarray, DMatrix], Tuple[str, float]]


class Booster(object):
Expand Down Expand Up @@ -1743,10 +1748,16 @@ def num_boosted_rounds(self) -> int:
'''
rounds = ctypes.c_int()
assert self.handle is not None
_check_call(_LIB.XGBoosterBoostedRounds(
self.handle, ctypes.byref(rounds)))
_check_call(_LIB.XGBoosterBoostedRounds(self.handle, ctypes.byref(rounds)))
return rounds.value

def num_features(self) -> int:
'''Number of features in booster.'''
features = ctypes.c_int()
assert self.handle is not None
_check_call(_LIB.XGBoosterGetNumFeature(self.handle, ctypes.byref(features)))
return features.value

def dump_model(self, fout, fmap='', with_stats=False, dump_format="text"):
"""Dump model into a text or JSON file. Unlike `save_model`, the
output format is primarily used for visualization or interpretation,
Expand Down

0 comments on commit 80065d5

Please sign in to comment.