Skip to content

Commit 7fc3a91

Browse files
mfeurereddiebergman
andcommitted
Dummy implementation of a multi-objective ensemble. (#1523)
* Dummy implementation of a multi-objective ensemble. * Fix bug * One bugfix and suggestions from Eddie * Move single best common code into parent class * fix docstring * Add tests + improve docs + simplify code * Factor out pareto_front into stand alone function * Make the pareto set a property By encapsulating it this way, type checker is more friendly, the @Property is always available and will throw an error if not fitted. IN contrast, a non-existent property is likely to give a much more inuintive error that "attribute pareto_set_ does not exist" * Add None defaults, fix indentation * Move resolve ensemble class check to init * Revert "Move resolve ensemble class check to init" This reverts commit 446b7d6. * Fix `_resolve_ensemble_class` and make it private See the comment string for the fix * Fix variable name * Fix missing parameter names * Fix bug, update docs * Implement requested changes, fix bug Co-authored-by: eddiebergman <eddiebergmanhs@gmail.com>
1 parent 0c71134 commit 7fc3a91

File tree

214 files changed

+980
-143
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

214 files changed

+980
-143
lines changed

autosklearn/automl.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
AbstractMultiObjectiveEnsemble,
8080
)
8181
from autosklearn.ensembles.ensemble_selection import EnsembleSelection
82-
from autosklearn.ensembles.singlebest_ensemble import SingleBest
82+
from autosklearn.ensembles.singlebest_ensemble import SingleBestFromRunhistory
8383
from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
8484
from autosklearn.evaluation.abstract_evaluator import _fit_and_suppress_warnings
8585
from autosklearn.evaluation.train_evaluator import TrainEvaluator, _fit_with_budget
@@ -1638,7 +1638,7 @@ def _load_best_individual_model(self):
16381638
return None
16391639

16401640
# SingleBest contains the best model found by AutoML
1641-
ensemble = SingleBest(
1641+
ensemble = SingleBestFromRunhistory(
16421642
metrics=self._metrics,
16431643
task_type=self._task,
16441644
seed=self._seed,
@@ -1663,13 +1663,12 @@ def _load_pareto_set(self) -> Sequence[VotingClassifier | VotingRegressor]:
16631663
raise ValueError("Pareto set only available if ensemble can be loaded.")
16641664

16651665
if isinstance(self.ensemble_, AbstractMultiObjectiveEnsemble):
1666-
pareto_set = self.ensemble_.get_pareto_set()
1666+
pareto_set = self.ensemble_.pareto_set
16671667
else:
16681668
self._logger.warning(
16691669
"Pareto set not available for single objective ensemble "
16701670
"method. The Pareto set will only include the single ensemble "
1671-
"constructed by %s",
1672-
type(self.ensemble_),
1671+
f"constructed by {type(self.ensemble_)},"
16731672
)
16741673
pareto_set = [self.ensemble_]
16751674

autosklearn/ensembles/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
from .abstract_ensemble import AbstractEnsemble, AbstractMultiObjectiveEnsemble
22
from .ensemble_selection import EnsembleSelection
3-
from .singlebest_ensemble import SingleBest
3+
from .multiobjective_dummy_ensemble import MultiObjectiveDummyEnsemble
4+
from .singlebest_ensemble import (
5+
SingleBest,
6+
SingleBestFromRunhistory,
7+
SingleModelEnsemble,
8+
)
49

510
__all__ = [
611
"AbstractEnsemble",
712
"AbstractMultiObjectiveEnsemble",
813
"EnsembleSelection",
14+
"SingleBestFromRunhistory",
915
"SingleBest",
16+
"SingleModelEnsemble",
17+
"MultiObjectiveDummyEnsemble",
1018
]

autosklearn/ensembles/abstract_ensemble.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
from abc import ABC, abstractmethod
4-
from typing import Dict, List, Sequence, Tuple, Union
4+
from typing import Any, Dict, List, Sequence, Tuple, Union
55

66
import numpy as np
77

@@ -18,19 +18,29 @@ def __init__(
1818
self,
1919
task_type: int,
2020
metrics: Sequence[Scorer] | Scorer,
21-
random_state: int | np.random.RandomState | None,
2221
backend: Backend,
22+
random_state: int | np.random.RandomState | None = None,
2323
):
2424
pass
2525

26+
def __getstate__(self) -> Dict[str, Any]:
27+
# Cannot serialize a metric if
28+
# it is user defined.
29+
# That is, if doing pickle dump
30+
# the metric won't be the same as the
31+
# one in __main__. we don't use the metric
32+
# in the EnsembleSelection so this should
33+
# be fine
34+
return {key: value for key, value in self.__dict__.items() if key != "metrics"}
35+
2636
@abstractmethod
2737
def fit(
2838
self,
2939
base_models_predictions: np.ndarray | List[np.ndarray],
30-
X_data: SUPPORTED_FEAT_TYPES,
3140
true_targets: np.ndarray,
3241
model_identifiers: List[Tuple[int, int, float]],
3342
runs: Sequence[Run],
43+
X_data: SUPPORTED_FEAT_TYPES | None = None,
3444
) -> "AbstractEnsemble":
3545
"""Fit an ensemble given predictions of base models and targets.
3646
@@ -79,7 +89,7 @@ def predict(
7989
8090
Returns
8191
-------
82-
array : [n_data_points]
92+
np.ndarray
8393
"""
8494
pass
8595

@@ -97,7 +107,7 @@ def get_models_with_weights(
97107
98108
Returns
99109
-------
100-
array : [(weight_1, model_1), ..., (weight_n, model_n)]
110+
List[Tuple[float, BasePipeline]]
101111
"""
102112

103113
@abstractmethod
@@ -115,7 +125,7 @@ def get_identifiers_with_weights(
115125
116126
Returns
117127
-------
118-
array : [(identifier_1, weight_1), ..., (identifier_n, weight_n)]
128+
List[Tuple[Tuple[int, int, float], float]
119129
"""
120130

121131
@abstractmethod
@@ -133,12 +143,25 @@ def get_selected_model_identifiers(self) -> List[Tuple[int, int, float]]:
133143
def get_validation_performance(self) -> float:
134144
"""Return validation performance of ensemble.
135145
136-
Return
137-
------
146+
Returns
147+
-------
138148
float
139149
"""
140150

141151

142152
class AbstractMultiObjectiveEnsemble(AbstractEnsemble):
143-
def get_pareto_set(self) -> Sequence[AbstractEnsemble]:
144-
pass
153+
@property
154+
@abstractmethod
155+
def pareto_set(self) -> Sequence[AbstractEnsemble]:
156+
"""Get a sequence on ensembles that are on the pareto front
157+
158+
Raises
159+
------
160+
SklearnNotFittedError
161+
If ``fit`` has not been called and the pareto set does not exist yet
162+
163+
Returns
164+
-------
165+
Sequence[AbstractEnsemble]
166+
"""
167+
...

autosklearn/ensembles/ensemble_selection.py

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
3+
from typing import Dict, List, Sequence, Tuple, Union
44

55
import random
66
import warnings
@@ -23,11 +23,11 @@ def __init__(
2323
self,
2424
task_type: int,
2525
metrics: Sequence[Scorer] | Scorer,
26-
random_state: Optional[Union[int, np.random.RandomState]],
2726
backend: Backend,
2827
ensemble_size: int = 50,
2928
bagging: bool = False,
3029
mode: str = "fast",
30+
random_state: int | np.random.RandomState | None = None,
3131
) -> None:
3232
"""An ensemble of selected algorithms
3333
@@ -43,14 +43,6 @@ def __init__(
4343
The metric used to evaluate the models. If multiple metrics are passed,
4444
ensemble selection only optimizes for the first
4545
46-
random_state: Optional[int | RandomState] = None
47-
The random_state used for ensemble selection.
48-
49-
* None - Uses numpy's default RandomState object
50-
* int - Successive calls to fit will produce the same results
51-
* RandomState - Truly random, each call to fit will produce
52-
different results, even with the same object.
53-
5446
backend : Backend
5547
Gives access to the backend of Auto-sklearn. Not used by Ensemble Selection.
5648
@@ -62,6 +54,14 @@ def __init__(
6254
* 'slow' - The original method used in Rich Caruana's ensemble selection.
6355
* 'fast' - A faster version of Rich Caruanas' ensemble selection.
6456
57+
random_state: int | RandomState | None = None
58+
The random_state used for ensemble selection.
59+
60+
* None - Uses numpy's default RandomState object
61+
* int - Successive calls to fit will produce the same results
62+
* RandomState - Truly random, each call to fit will produce
63+
different results, even with the same object.
64+
6565
References
6666
----------
6767
| Ensemble selection from libraries of models
@@ -92,23 +92,13 @@ def __init__(
9292
# https://scikit-learn.org/stable/common_pitfalls.html#controlling-randomness
9393
self.random_state = random_state
9494

95-
def __getstate__(self) -> Dict[str, Any]:
96-
# Cannot serialize a metric if
97-
# it is user defined.
98-
# That is, if doing pickle dump
99-
# the metric won't be the same as the
100-
# one in __main__. we don't use the metric
101-
# in the EnsembleSelection so this should
102-
# be fine
103-
return {key: value for key, value in self.__dict__.items() if key != "metrics"}
104-
10595
def fit(
10696
self,
10797
base_models_predictions: List[np.ndarray],
108-
X_data: SUPPORTED_FEAT_TYPES,
10998
true_targets: np.ndarray,
11099
model_identifiers: List[Tuple[int, int, float]],
111100
runs: Sequence[Run],
101+
X_data: SUPPORTED_FEAT_TYPES | None = None,
112102
) -> EnsembleSelection:
113103
self.ensemble_size = int(self.ensemble_size)
114104
if self.ensemble_size < 1:
@@ -141,20 +131,22 @@ def fit(
141131
def _fit(
142132
self,
143133
predictions: List[np.ndarray],
144-
X_data: SUPPORTED_FEAT_TYPES,
145134
labels: np.ndarray,
135+
*,
136+
X_data: SUPPORTED_FEAT_TYPES | None = None,
146137
) -> EnsembleSelection:
147138
if self.mode == "fast":
148-
self._fast(predictions, X_data, labels)
139+
self._fast(predictions=predictions, X_data=X_data, labels=labels)
149140
else:
150-
self._slow(predictions, X_data, labels)
141+
self._slow(predictions=predictions, X_data=X_data, labels=labels)
151142
return self
152143

153144
def _fast(
154145
self,
155146
predictions: List[np.ndarray],
156-
X_data: SUPPORTED_FEAT_TYPES,
157147
labels: np.ndarray,
148+
*,
149+
X_data: SUPPORTED_FEAT_TYPES | None = None,
158150
) -> None:
159151
"""Fast version of Rich Caruana's ensemble selection method."""
160152
self.num_input_models_ = len(predictions)
@@ -231,8 +223,9 @@ def _fast(
231223
def _slow(
232224
self,
233225
predictions: List[np.ndarray],
234-
X_data: SUPPORTED_FEAT_TYPES,
235226
labels: np.ndarray,
227+
*,
228+
X_data: SUPPORTED_FEAT_TYPES | None = None,
236229
) -> None:
237230
"""Rich Caruana's ensemble selection method."""
238231
self.num_input_models_ = len(predictions)
@@ -311,7 +304,7 @@ def _bagging(
311304
# Bagging a set of models
312305
indices = sorted(random.sample(range(0, n_models), bag_size))
313306
bag = predictions[indices, :, :]
314-
order, _ = self._fit(bag, labels)
307+
order, _ = self._fit(predictions=bag, labels=labels)
315308
order_of_each_bag.append(order)
316309

317310
return np.array(

0 commit comments

Comments
 (0)