Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix learning rate scheduler with cv. #6720

Merged
merged 2 commits into from
Feb 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 5 additions & 2 deletions python-package/xgboost/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def _get_callback_context(env):
context = 'train'
elif env.model is None and env.cvfolds is not None:
context = 'cv'
else:
raise ValueError("Unexpected input with both model and cvfolds.")
return context


Expand Down Expand Up @@ -751,7 +753,7 @@ def before_iteration(self, model, epoch, dtrain, evals):
'''Called before each iteration.'''
for cb in self.callbacks_before_iter:
rank = rabit.get_rank()
cb(CallbackEnv(model=model,
cb(CallbackEnv(model=None if self.cvfolds is not None else model,
cvfolds=self.cvfolds,
iteration=epoch,
begin_iteration=self.start_iteration,
Expand All @@ -764,6 +766,7 @@ def after_iteration(self, model, epoch, dtrain, evals):
'''Called after each iteration.'''
evaluation_result_list = []
if self.cvfolds is not None:
# dtrain is not used here.
scores = model.eval(epoch, self.feval)
self.aggregated_cv = _aggcv(scores)
evaluation_result_list = self.aggregated_cv
Expand All @@ -782,7 +785,7 @@ def after_iteration(self, model, epoch, dtrain, evals):
try:
for cb in self.callbacks_after_iter:
rank = rabit.get_rank()
cb(CallbackEnv(model=model,
cb(CallbackEnv(model=None if self.cvfolds is not None else model,
cvfolds=self.cvfolds,
iteration=epoch,
begin_iteration=self.start_iteration,
Expand Down
28 changes: 21 additions & 7 deletions python-package/xgboost/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,

.. code-block:: python

[xgb.callback.reset_learning_rate(custom_rates)]
[xgb.callback.LearningRateScheduler(custom_rates)]

Returns
-------
Expand All @@ -207,6 +207,11 @@ def __init__(self, dtrain, dtest, param):
self.watchlist = [(dtrain, 'train'), (dtest, 'test')]
self.bst = Booster(param, [dtrain, dtest])

def __getattr__(self, name):
def _inner(*args, **kwargs):
return getattr(self.bst, name)(*args, **kwargs)
return _inner

def update(self, iteration, fobj):
""""Update the boosters for one iteration"""
self.bst.update(self.dtrain, iteration, fobj)
Expand Down Expand Up @@ -239,15 +244,24 @@ def attr(self, key):
'''Redirect to booster attr.'''
return self.cvfolds[0].bst.attr(key)

def set_param(self, params, value=None):
"""Iterate through folds for set_param"""
for f in self.cvfolds:
f.bst.set_param(params, value)

def num_boosted_rounds(self):
'''Number of boosted rounds.'''
return self.cvfolds[0].num_boosted_rounds()

@property
def best_iteration(self):
'''Get best_iteration'''
ret = self.cvfolds[0].bst.attr('best_iteration')
return int(ret)
return int(self.cvfolds[0].bst.attr("best_iteration"))

def num_boosted_rounds(self) -> int:
'''Number of boosted rounds.'''
return self.cvfolds[0].bst.num_boosted_rounds()
@property
def best_score(self):
"""Get best_score."""
return float(self.cvfolds[0].bst.attr("best_score"))


def groups_to_rows(groups, boundaries):
Expand Down Expand Up @@ -419,7 +433,7 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None

.. code-block:: python

[xgb.callback.reset_learning_rate(custom_rates)]
[xgb.callback.LearningRateScheduler(custom_rates)]
shuffle : bool
Shuffle data before creating folds.

Expand Down
33 changes: 21 additions & 12 deletions tests/python/test_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def test_early_stopping_continuation(self):
booster.best_iteration + early_stopping_rounds + 1

def run_eta_decay(self, tree_method, deprecated_callback):
"""Test learning rate scheduler, used by both CPU and GPU tests."""
if deprecated_callback:
scheduler = xgb.callback.reset_learning_rate
else:
Expand All @@ -217,7 +218,10 @@ def run_eta_decay(self, tree_method, deprecated_callback):
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4

warning_check = pytest.warns(UserWarning) if deprecated_callback else tm.noop_context()
if deprecated_callback:
warning_check = pytest.warns(UserWarning)
else:
warning_check = tm.noop_context()

# learning_rates as a list
# init eta with 0 to check whether learning_rates work
Expand Down Expand Up @@ -288,17 +292,22 @@ def eta_decay(ithround, num_boost_round=num_round):
for i in range(1, len(eval_errors_0)):
assert eval_errors_3[i] != eval_errors_2[i]

def test_eta_decay_hist(self):
self.run_eta_decay('hist', True)
self.run_eta_decay('hist', False)

def test_eta_decay_approx(self):
self.run_eta_decay('approx', True)
self.run_eta_decay('approx', False)

def test_eta_decay_exact(self):
self.run_eta_decay('exact', True)
self.run_eta_decay('exact', False)
with warning_check:
xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])

@pytest.mark.parametrize(
"tree_method, deprecated_callback",
[
("hist", True),
("hist", False),
("approx", True),
("approx", False),
("exact", True),
("exact", False),
],
)
def test_eta_decay(self, tree_method, deprecated_callback):
self.run_eta_decay(tree_method, deprecated_callback)

def test_check_point(self):
from sklearn.datasets import load_breast_cancer
Expand Down