Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions extension_templates/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ class MyExperiment(BaseExperiment):
#
"property:randomness": "random",
# valid values: "random", "deterministic"
# if "deterministic", two calls of score must result in the same value
# if "deterministic", two calls of "evaluate" must result in the same value
#
"property:higher_or_lower_is_better": "lower",
# valid values: "higher", "lower", "mixed"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is mixed? It is not handled by the cost method.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in theory there could be an experiment that is neither "lower is better" nor "higher is better" - an example would be a metric such as "empirical coverage of 90% nominal prediction intervals", where the best value is 90% (not 0% or 100%)

# whether higher or lower returns of "evaluate" are better
#
# --------------
# packaging info
Expand Down Expand Up @@ -147,25 +151,25 @@ def _paramnames(self):
return ["score_param1", "score_param2"]

# todo: implement this, mandatory
def _score(self, params):
"""Score the parameters.
def _evaluate(self, params):
"""Evaluate the parameters.

Parameters
----------
params : dict with string keys
Parameters to score.
Parameters to evaluate.

Returns
-------
float
The score of the parameters.
The value of the parameters as per evaluation.
dict
Additional metadata about the search.
"""
# params is a dictionary with keys being paramnames or subset thereof
# IMPORTANT: avoid side effects to params!
#
# the method may work if only a subste of the parameters in paramnames is passed
# the method may work if only a subset of the parameters in paramnames is passed
# but this is not necessary
value = 42 # must be numpy.float64
metadata = {"some": "metadata"} # can be any dict
Expand Down Expand Up @@ -230,18 +234,19 @@ def get_test_params(cls, parameter_set="default"):

@classmethod
def _get_score_params(self):
"""Return settings for testing the score function. Used in tests only.
"""Return settings for testing score/evaluate functions. Used in tests only.

Returns a list, the i-th element corresponds to self.get_test_params()[i].
It should be a valid call for self.score.
Returns a list, the i-th element should be valid arguments for
self.evaluate and self.score, of an instance constructed with
self.get_test_params()[i].

Returns
-------
list of dict
The parameters to be used for scoring.
"""
# dict keys should be same as paramnames return
# or subset, only if _score allows for subsets of parameters
# or subset, only if _evaluate allows for subsets of parameters
score_params1 = {"score_param1": "foo", "score_param2": "bar"}
score_params2 = {"score_param1": "baz", "score_param2": "qux"}
return [score_params1, score_params2]
10 changes: 8 additions & 2 deletions extension_templates/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ def _paramnames(self):
return ["score_param1", "score_param2"]

# optional: implement this to prepare arguments for _run
# the default is all parameters passed to __init__, except ex
# the default is all parameters passed to __init__, minus the experiment
# the result of this is passed to _run as search_config
def get_search_config(self):
"""Get the search configuration.

Expand All @@ -153,12 +154,15 @@ def get_search_config(self):
"""
# the default
search_config = super().get_search_config()
# example of adding a new parameter to the search config
# this is optional, but can be useful for clean separation or API interfacing
search_config["one_more_param"] = 42
# this return is available in _run as search_config
return search_config

# todo: implement this, mandatory
def _run(self, experiment, **search_config):
"""Run the optimization search process.
"""Run the optimization search process to maximize the experiment's score.

Parameters
----------
Expand All @@ -173,6 +177,8 @@ def _run(self, experiment, **search_config):
The best parameters found during the search.
Must have keys a subset or identical to experiment.paramnames().
"""
# important: the search logic should *maximize* the experiment's score
# this is the main method to implement, it should return the best parameters
best_params = {"write_some_logic_to_get": "best_params"}
return best_params

Expand Down
55 changes: 46 additions & 9 deletions src/hyperactive/base/_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ class BaseExperiment(BaseObject):
"property:randomness": "random", # random or deterministic
# if deterministic, two calls of score will result in the same value
# random = two calls may result in different values; same as "stochastic"
"property:higher_or_lower_is_better": "higher", # "higher", "lower", "mixed"
# whether higher or lower scores are better
}

def __init__(self):
super().__init__()

def __call__(self, **kwargs):
"""Score parameters, with kwargs call."""
"""Score parameters, with kwargs call. Same as score call."""
score, _ = self.score(kwargs)
return score

Expand Down Expand Up @@ -48,30 +50,55 @@ def _paramnames(self):
"""
raise NotImplementedError

def score(self, params):
"""Score the parameters.
def evaluate(self, params):
"""Evaluate the parameters.

Parameters
----------
params : dict with string keys
Parameters to score.
Parameters to evaluate.

Returns
-------
float
The score of the parameters.
The value of the parameters as per evaluation.
dict
Additional metadata about the search.
"""
paramnames = self.paramnames()
if not set(params.keys()) <= set(paramnames):
raise ValueError("Parameters do not match.")
res, metadata = self._score(params)
res, metadata = self._evaluate(params)
res = np.float64(res)
return res, metadata

def _score(self, params):
"""Score the parameters.
def _evaluate(self, params):
"""Evaluate the parameters.

Parameters
----------
params : dict with string keys
Parameters to evaluate.

Returns
-------
float
The value of the parameters as per evaluation.
dict
Additional metadata about the search.
"""
raise NotImplementedError

def score(self, params):
"""Score the parameters - with sign such that higher is always better.

Same as ``evaluate`` call except for the sign chosen so that higher is better.

If the tag ``property:higher_or_lower_is_better`` is set to
``"lower"``, the result is ``-self.evaluate(params)``.

If the tag is set to ``"higher"``, the result is
identical to ``self.evaluate(params)``.

Parameters
----------
Expand All @@ -85,4 +112,14 @@ def _score(self, params):
dict
Additional metadata about the search.
"""
raise NotImplementedError
hib = self.get_tag("property:higher_or_lower_is_better", "lower")
if hib == "higher":
sign = 1
elif hib == "lower":
sign = -1

eval_res = self.evaluate(params)
value = eval_res[0]
metadata = eval_res[1]

return sign * value, metadata
13 changes: 12 additions & 1 deletion src/hyperactive/base/_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,23 @@ def get_experiment(self):
return self._experiment

def run(self):
"""Run the optimization search process.
"""Run the optimization search process to maximize the experiment's score.

The optimization searches for a maximizer of the experiment's
``score`` method.

Depending on the tag ``property:higher_or_lower_is_better`` being
set to ``higher`` or ``lower``, the ``run`` method will search for:

* the minimizer of the ``evaluate`` method if the tag is ``lower``
* the maximizer of the ``evaluate`` method if the tag is ``higher``

Returns
-------
best_params : dict
The best parameters found during the optimization process.
The dict ``best_params`` can be used in ``experiment.score`` or
``experiment.evaluate`` directly.
"""
experiment = self.get_experiment()
search_config = self.get_search_config()
Expand Down
99 changes: 92 additions & 7 deletions src/hyperactive/experiment/integrations/sklearn_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ def __init__(self, estimator, X, y, scoring=None, cv=None):
self._scoring = make_scorer(scoring)
self.scorer_ = self._scoring

# Set the sign of the scoring function
if hasattr(self._scoring, "_score"):
score_func = self._scoring._score_func
_sign = _guess_sign_of_sklmetric(score_func)
_sign_str = "higher" if _sign == 1 else "lower"
self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})

def _paramnames(self):
"""Return the parameter names of the search.

Expand All @@ -120,18 +127,18 @@ def _paramnames(self):
"""
return list(self.estimator.get_params().keys())

def _score(self, params):
"""Score the parameters.
def _evaluate(self, params):
"""Evaluate the parameters.

Parameters
----------
params : dict with string keys
Parameters to score.
Parameters to evaluate.

Returns
-------
float
The score of the parameters.
The value of the parameters as per evaluation.
dict
Additional metadata about the search.
"""
Expand Down Expand Up @@ -221,10 +228,11 @@ def get_test_params(cls, parameter_set="default"):

@classmethod
def _get_score_params(self):
"""Return settings for testing the score function. Used in tests only.
"""Return settings for testing score/evaluate functions. Used in tests only.

Returns a list, the i-th element corresponds to self.get_test_params()[i].
It should be a valid call for self.score.
Returns a list, the i-th element should be valid arguments for
self.evaluate and self.score, of an instance constructed with
self.get_test_params()[i].

Returns
-------
Expand All @@ -235,3 +243,80 @@ def _get_score_params(self):
score_params_regress = {"C": 1.0, "kernel": "linear"}
score_params_defaults = {"C": 1.0, "kernel": "linear"}
return [score_params_classif, score_params_regress, score_params_defaults]


def _guess_sign_of_sklmetric(scorer):
"""Guess the sign of a sklearn metric scorer.

Parameters
----------
scorer : callable
The sklearn metric scorer to guess the sign for.

Returns
-------
int
1 if higher scores are better, -1 if lower scores are better.
"""
HIGHER_IS_BETTER = {
# Classification
"accuracy_score": True,
"auc": True,
"average_precision_score": True,
"balanced_accuracy_score": True,
"brier_score_loss": False,
"class_likelihood_ratios": False,
"cohen_kappa_score": True,
"d2_log_loss_score": True,
"dcg_score": True,
"f1_score": True,
"fbeta_score": True,
"hamming_loss": False,
"hinge_loss": False,
"jaccard_score": True,
"log_loss": False,
"matthews_corrcoef": True,
"ndcg_score": True,
"precision_score": True,
"recall_score": True,
"roc_auc_score": True,
"top_k_accuracy_score": True,
"zero_one_loss": False,

# Regression
"d2_absolute_error_score": True,
"d2_pinball_score": True,
"d2_tweedie_score": True,
"explained_variance_score": True,
"max_error": False,
"mean_absolute_error": False,
"mean_absolute_percentage_error": False,
"mean_gamma_deviance": False,
"mean_pinball_loss": False,
"mean_poisson_deviance": False,
"mean_squared_error": False,
"mean_squared_log_error": False,
"mean_tweedie_deviance": False,
"median_absolute_error": False,
"r2_score": True,
"root_mean_squared_error": False,
"root_mean_squared_log_error": False,
}

scorer_name = getattr(scorer, "__name__", None)

if hasattr(scorer, "greater_is_better"):
return 1 if scorer.greater_is_better else -1
elif scorer_name in HIGHER_IS_BETTER:
return 1 if HIGHER_IS_BETTER[scorer_name] else -1
elif scorer_name.endswith("_score"):
# If the scorer name ends with "_score", we assume higher is better
return 1
elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
# If the scorer name ends with "_loss", we assume lower is better
return -1
elif scorer_name.endswith("_error"):
return -1
else:
# If we cannot determine the sign, we assume lower is better
return -1
Loading
Loading