Skip to content

Commit

Permalink
[breaking] Remove duplicated predict functions.
Browse files Browse the repository at this point in the history
* Rename all `data` to `X`.
  • Loading branch information
trivialfis committed Jan 12, 2021
1 parent f2f7dd8 commit cf7660a
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 81 deletions.
115 changes: 41 additions & 74 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,10 +687,16 @@ def fit(self, X, y, *, sample_weight=None, base_margin=None,

return self

def predict(self, data, output_margin=False, ntree_limit=None,
validate_features=True, base_margin=None):
def predict(
self,
X,
output_margin=False,
ntree_limit=None,
validate_features=True,
base_margin=None
):
"""
Predict with `data`.
Predict with `X`.
.. note:: This function is not thread safe.
Expand All @@ -704,7 +710,7 @@ def predict(self, data, output_margin=False, ntree_limit=None,
Parameters
----------
data : array_like
X : array_like
Data to predict with
output_margin : bool
Whether to output the raw untransformed margin value.
Expand All @@ -723,16 +729,21 @@ def predict(self, data, output_margin=False, ntree_limit=None,
prediction : numpy array
"""
# pylint: disable=missing-docstring,invalid-name
test_dmatrix = DMatrix(data, base_margin=base_margin,
test_dmatrix = DMatrix(X, base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
# get ntree_limit to use - if none specified, default to
# best_ntree_limit if defined, otherwise 0.
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)
return self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit,
validate_features=validate_features)
try:
ntree_limit = self.best_ntree_limit
except AttributeError:
ntree_limit = 0
return self.get_booster().predict(
test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit,
validate_features=validate_features
)

def apply(self, X, ntree_limit=0):
"""Return the predicted leaf every tree for each sample.
Expand Down Expand Up @@ -1048,50 +1059,21 @@ def fit(self, X, y, *, sample_weight=None, base_margin=None,
'Fit gradient boosting model',
'Fit gradient boosting classifier', 1)

def predict(self, data, output_margin=False, ntree_limit=None,
validate_features=True, base_margin=None):
"""
Predict with `data`.
.. note:: This function is not thread safe.
For each booster object, predict can only be called from one thread.
If you want to run prediction using multiple thread, call
``xgb.copy()`` to make copies of model object and then call
``predict()``.
.. code-block:: python
preds = bst.predict(dtest, ntree_limit=num_round)
Parameters
----------
data : array_like
Feature matrix.
output_margin : bool
Whether to output the raw untransformed margin value.
ntree_limit : int
Limit number of trees in the prediction; defaults to
best_ntree_limit if defined (i.e. it has been trained with early
stopping), otherwise 0 (use all trees).
validate_features : bool
When this is True, validate that the Booster's and data's
feature_names are identical. Otherwise, it is assumed that the
feature_names are the same.
Returns
-------
prediction : numpy array
"""
test_dmatrix = DMatrix(data, base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)
class_probs = self.get_booster().predict(
test_dmatrix,
def predict(
self,
X,
output_margin=False,
ntree_limit=None,
validate_features=True,
base_margin=None
):
class_probs = super().predict(
X=X,
output_margin=output_margin,
ntree_limit=ntree_limit,
validate_features=validate_features)
validate_features=validate_features,
base_margin=base_margin
)
if output_margin:
# If output_margin is active, simply return the scores
return class_probs
Expand Down Expand Up @@ -1136,13 +1118,13 @@ def predict_proba(self, X, ntree_limit=None, validate_features=False,
a numpy array of shape array-like of shape (n_samples, n_classes) with the
probability of each data example being of a given class.
"""
test_dmatrix = DMatrix(X, base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)
class_probs = self.get_booster().predict(test_dmatrix,
ntree_limit=ntree_limit,
validate_features=validate_features)
class_probs = super().predict(
X=X,
output_margin=False,
ntree_limit=ntree_limit,
validate_features=validate_features,
base_margin=base_margin
)
return _cls_predict_proba(self.objective, class_probs, np.vstack)

def evals_result(self):
Expand Down Expand Up @@ -1510,18 +1492,3 @@ def fit(
self.evals_result = evals_result

return self

def predict(self, data, output_margin=False,
ntree_limit=0, validate_features=True, base_margin=None):

test_dmatrix = DMatrix(data, base_margin=base_margin,
missing=self.missing)
if ntree_limit is None:
ntree_limit = getattr(self, "best_ntree_limit", 0)

return self.get_booster().predict(test_dmatrix,
output_margin=output_margin,
ntree_limit=ntree_limit,
validate_features=validate_features)

predict.__doc__ = XGBModel.predict.__doc__
18 changes: 11 additions & 7 deletions python-package/xgboost/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def _train_internal(params, dtrain,

num_groups = int(config['learner']['learner_model_param']['num_class'])
num_groups = 1 if num_groups == 0 else num_groups
bst.best_ntree_limit = ((bst.best_iteration + 1) * num_parallel_tree * num_groups)
bst.set_attr(
best_ntree_limit=str((bst.best_iteration + 1) * num_parallel_tree * num_groups)
)
bst.best_ntree_limit = int(bst.attr("best_ntree_limit"))

# Copy to serialise and unserialise booster to reset state and free
# training memory
Expand Down Expand Up @@ -148,15 +151,16 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
Activates early stopping. Validation metric needs to improve at least once in
every **early_stopping_rounds** round(s) to continue training.
Requires at least one item in **evals**.
The method returns the model from the last iteration (not the best one).
If there's more than one item in **evals**, the last entry will be used
for early stopping.
The method returns the model from the last iteration (not the best one). Use
custom callback or model slicing if the best model is desired.
If there's more than one item in **evals**, the last entry will be used for early
stopping.
If there's more than one metric in the **eval_metric** parameter given in
**params**, the last metric will be used for early stopping.
If early stopping occurs, the model will have three additional fields:
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
(Use ``bst.best_ntree_limit`` to get the correct value if
``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. (Use
``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
``num_class`` appears in the parameters)
evals_result: dict
This dictionary stores the evaluation results of all the items in watchlist.
Expand Down

0 comments on commit cf7660a

Please sign in to comment.