Skip to content

Commit

Permalink
FIX _get_estimators_indices() in BaseBagging (scikit-learn#16437)
Browse files Browse the repository at this point in the history
  • Loading branch information
chofchof authored and gio8tisu committed May 15, 2020
1 parent 306385c commit 8d14bfa
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 5 deletions.
9 changes: 8 additions & 1 deletion doc/whats_new/v0.23.rst
Expand Up @@ -23,7 +23,8 @@ parameters, may produce different models from the previous version. This often
occurs due to changes in the modelling logic (bug fixes or enhancements), or in
random sampling procedures.

- list models here
- :class:`ensemble.BaggingClassifier`, :class:`ensemble.BaggingRegressor`,
and :class:`ensemble.IsolationForest`. |Fix|

Details are listed in the changelog below.

Expand Down Expand Up @@ -140,6 +141,12 @@ Changelog
samples in the training set. :pr:`14516` by :user:`Johann Faouzi
<johannfaouzi>`.

- |Fix| Fixed a bug in :class:`ensemble.BaggingClassifier`,
:class:`ensemble.BaggingRegressor` and :class:`ensemble.IsolationForest`
where the attribute `estimators_samples_` did not generate the proper indices
used during `fit`.
:pr:`16437` by :user:`Jin-Hwan CHO <chofchof>`.

:mod:`sklearn.feature_extraction`
.................................

Expand Down
5 changes: 2 additions & 3 deletions sklearn/ensemble/_bagging.py
Expand Up @@ -82,7 +82,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight,
print("Building estimator %d of %d for this parallel run "
"(total %d)..." % (i + 1, n_estimators, total_n_estimators))

random_state = np.random.RandomState(seeds[i])
random_state = seeds[i]
estimator = ensemble._make_estimator(append=False,
random_state=random_state)

Expand Down Expand Up @@ -405,9 +405,8 @@ def _get_estimators_indices(self):
for seed in self._seeds:
# Operations accessing random_state must be performed identically
# to those in `_parallel_build_estimators()`
random_state = np.random.RandomState(seed)
feature_indices, sample_indices = _generate_bagging_indices(
random_state, self.bootstrap_features, self.bootstrap,
seed, self.bootstrap_features, self.bootstrap,
self.n_features_, self._n_samples, self._max_features,
self._max_samples)

Expand Down
22 changes: 22 additions & 0 deletions sklearn/ensemble/tests/test_bagging.py
Expand Up @@ -878,3 +878,25 @@ def test_bagging_small_max_features():
bagging = BaggingClassifier(LogisticRegression(),
max_features=0.3, random_state=1)
bagging.fit(X, y)


def test_bagging_get_estimators_indices():
# Check that Bagging estimator can generate sample indices properly
# Non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/16436

rng = np.random.RandomState(0)
X = rng.randn(13, 4)
y = np.arange(13)

class MyEstimator(DecisionTreeRegressor):
"""An estimator which stores y indices information at fit."""
def fit(self, X, y):
self._sample_indices = y

clf = BaggingRegressor(base_estimator=MyEstimator(),
n_estimators=1, random_state=0)
clf.fit(X, y)

assert_array_equal(clf.estimators_[0]._sample_indices,
clf.estimators_samples_[0])
2 changes: 1 addition & 1 deletion sklearn/tree/_classes.py
Expand Up @@ -1668,7 +1668,7 @@ class ExtraTreeRegressor(DecisionTreeRegressor):
>>> reg = BaggingRegressor(extra_tree, random_state=0).fit(
... X_train, y_train)
>>> reg.score(X_test, y_test)
0.7788...
0.7447...
"""
def __init__(self,
criterion="mse",
Expand Down

0 comments on commit 8d14bfa

Please sign in to comment.