Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def __init__(self,
smac_scenario_args=None,
logging_config=None,
metric=None,
scoring_functions=None
):
super(AutoML, self).__init__()
self._backend = backend
Expand All @@ -149,6 +150,7 @@ def __init__(self,
self._include_preprocessors = include_preprocessors
self._exclude_preprocessors = exclude_preprocessors
self._resampling_strategy = resampling_strategy
self._scoring_functions = scoring_functions if scoring_functions is not None else []
self._resampling_strategy_arguments = resampling_strategy_arguments \
if resampling_strategy_arguments is not None else {}
if self._resampling_strategy not in ['holdout',
Expand Down Expand Up @@ -715,6 +717,7 @@ def fit(
disable_file_output=self._disable_evaluator_output,
get_smac_object_callback=self._get_smac_object_callback,
smac_scenario_args=self._smac_scenario_args,
scoring_functions=self._scoring_functions,
ensemble_callback=proc_ensemble,
)

Expand Down Expand Up @@ -1038,7 +1041,7 @@ def score(self, X, y):
prediction=prediction,
task_type=self._task,
metric=self._metric,
all_scoring_functions=False)
scoring_functions=None)

@property
def cv_results_(self):
Expand Down Expand Up @@ -1073,11 +1076,21 @@ def cv_results_(self):
masks[name] = []
hp_names.append(name)

metric_mask = dict()
metric_dict = dict()
metric_name = []

for metric in self._scoring_functions:
metric_name.append(metric.name)
metric_dict[metric.name] = []
metric_mask[metric.name] = []

mean_test_score = []
mean_fit_time = []
params = []
status = []
budgets = []

for run_key in self.runhistory_.data:
run_value = self.runhistory_.data[run_key]
config_id = run_key.config_id
Expand Down Expand Up @@ -1120,7 +1133,23 @@ def cv_results_(self):
parameter_dictionaries[hp_name].append(hp_value)
masks[hp_name].append(mask_value)

for metric in self._scoring_functions:
if metric.name in run_value.additional_info.keys():
metric_cost = run_value.additional_info[metric.name]
metric_value = metric._optimum - (metric._sign * metric_cost)
mask_value = False
else:
metric_value = np.NaN
mask_value = True
metric_dict[metric.name].append(metric_value)
metric_mask[metric.name].append(mask_value)

results['mean_test_score'] = np.array(mean_test_score)
for name in metric_name:
masked_array = ma.MaskedArray(metric_dict[name],
metric_mask[name])
results['metric_%s' % name] = masked_array

results['mean_fit_time'] = np.array(mean_fit_time)
results['params'] = params
results['rank_test_scores'] = scipy.stats.rankdata(1 - results['mean_test_score'],
Expand Down
8 changes: 4 additions & 4 deletions autosklearn/ensemble_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ def score_ensemble_preds(self):
prediction=y_ensemble,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=False)
scoring_functions=None)

if np.isfinite(self.read_scores[y_ens_fn]["ens_score"]):
self.logger.debug(
Expand Down Expand Up @@ -1349,7 +1349,7 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
prediction=train_pred,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=False
scoring_functions=None
)
}
if valid_pred is not None:
Expand All @@ -1360,7 +1360,7 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
prediction=valid_pred,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=False
scoring_functions=None
)

# In case test_pred was provided
Expand All @@ -1370,7 +1370,7 @@ def _add_ensemble_trajectory(self, train_pred, valid_pred, test_pred):
prediction=test_pred,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=False
scoring_functions=None
)

self.ensemble_history.append(performance_stamp)
Expand Down
8 changes: 4 additions & 4 deletions autosklearn/ensembles/ensemble_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,15 @@ def _fast(
)

# Calculate score is versatile and can return a dict of score
# when all_scoring_functions=False, we know it will be a float
# when scoring_functions=None, we know it will be a float
calculated_score = cast(
float,
calculate_score(
solution=labels,
prediction=fant_ensemble_prediction,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=False
scoring_functions=None
)
)
scores[j] = self.metric._optimum - calculated_score
Expand Down Expand Up @@ -193,15 +193,15 @@ def _slow(
ensemble.append(pred)
ensemble_prediction = np.mean(np.array(ensemble), axis=0)
# Calculate score is versatile and can return a dict of score
# when all_scoring_functions=False, we know it will be a float
# when scoring_functions=None, we know it will be a float
calculated_score = cast(
float,
calculate_score(
solution=labels,
prediction=ensemble_prediction,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=False
scoring_functions=None
)
)
scores[j] = self.metric._optimum - calculated_score
Expand Down
12 changes: 10 additions & 2 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- encoding: utf-8 -*-

from typing import Optional, Dict
from typing import Optional, Dict, List

import dask.distributed
import joblib
Expand All @@ -9,6 +9,7 @@
from sklearn.utils.multiclass import type_of_target

from autosklearn.automl import AutoMLClassifier, AutoMLRegressor, AutoML
from autosklearn.metrics import Scorer
from autosklearn.util.backend import create


Expand Down Expand Up @@ -42,6 +43,7 @@ def __init__(
logging_config=None,
metadata_directory=None,
metric=None,
scoring_functions: Optional[List[Scorer]] = None,
load_models: bool = True,
):
"""
Expand Down Expand Up @@ -218,6 +220,10 @@ def __init__(
Metrics`_.
If None is provided, a default metric is selected depending on the task.

scoring_functions : List[Scorer], optional (None)
List of scorers which will be calculated for each pipeline and results will be
available via ``cv_results``

load_models : bool, optional (True)
Whether to load the models after fitting Auto-sklearn.

Expand Down Expand Up @@ -261,6 +267,7 @@ def __init__(
self.logging_config = logging_config
self.metadata_directory = metadata_directory
self._metric = metric
self._scoring_functions = scoring_functions
self._load_models = load_models

self.automl_ = None # type: Optional[AutoML]
Expand Down Expand Up @@ -316,7 +323,8 @@ def build_automl(
smac_scenario_args=smac_scenario_args,
logging_config=self.logging_config,
metadata_directory=self.metadata_directory,
metric=self._metric
metric=self._metric,
scoring_functions=self._scoring_functions
)

return automl
Expand Down
6 changes: 3 additions & 3 deletions autosklearn/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class ExecuteTaFuncWithQueue(AbstractTAFunc):
def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
cost_for_crash, abort_on_first_run_crash,
initial_num_run=1, stats=None,
run_obj='quality', par_factor=1, all_scoring_functions=False,
run_obj='quality', par_factor=1, scoring_functions=None,
output_y_hat_optimization=True, include=None, exclude=None,
memory_limit=None, disable_file_output=False, init_params=None,
budget_type=None, ta=False, pynisher_context='spawn', **resampling_strategy_args):
Expand Down Expand Up @@ -152,7 +152,7 @@ def __init__(self, backend, autosklearn_seed, resampling_strategy, metric,
self.metric = metric
self.resampling_strategy = resampling_strategy
self.resampling_strategy_args = resampling_strategy_args
self.all_scoring_functions = all_scoring_functions
self.scoring_functions = scoring_functions
# TODO deactivate output_y_hat_optimization and let the respective evaluator decide
self.output_y_hat_optimization = output_y_hat_optimization
self.include = include
Expand Down Expand Up @@ -274,7 +274,7 @@ def run(
metric=self.metric,
seed=self.autosklearn_seed,
num_run=num_run,
all_scoring_functions=self.all_scoring_functions,
scoring_functions=self.scoring_functions,
output_y_hat_optimization=self.output_y_hat_optimization,
include=self.include,
exclude=self.exclude,
Expand Down
20 changes: 10 additions & 10 deletions autosklearn/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def send_warnings_to_log(message, category, filename, lineno,
class AbstractEvaluator(object):
def __init__(self, backend, queue, metric,
configuration=None,
all_scoring_functions=False,
scoring_functions=None,
seed=1,
output_y_hat_optimization=True,
num_run=None,
Expand Down Expand Up @@ -141,7 +141,7 @@ def __init__(self, backend, queue, metric,
self.seed = seed

self.output_y_hat_optimization = output_y_hat_optimization
self.all_scoring_functions = all_scoring_functions
self.scoring_functions = scoring_functions

if isinstance(disable_file_output, (bool, list)):
self.disable_file_output = disable_file_output
Expand Down Expand Up @@ -221,7 +221,7 @@ def _get_model(self):
init_params=self._init_params)
return model

def _loss(self, y_true, y_hat, all_scoring_functions=None):
def _loss(self, y_true, y_hat, scoring_functions=None):
"""Auto-sklearn follows a minimization goal, so the make_scorer
sign is used as a guide to obtain the value to reduce.

Expand All @@ -233,20 +233,20 @@ def _loss(self, y_true, y_hat, all_scoring_functions=None):
For accuracy for example: optimum(1) - (+1 * actual score)
For logloss for example: optimum(0) - (-1 * actual score)
"""
all_scoring_functions = (
self.all_scoring_functions
if all_scoring_functions is None
else all_scoring_functions
scoring_functions = (
self.scoring_functions
if scoring_functions is None
else scoring_functions
)
if not isinstance(self.configuration, Configuration):
if all_scoring_functions:
return {self.metric: 1.0}
if scoring_functions:
return {self.metric.name: 1.0}
else:
return 1.0

score = calculate_score(
y_true, y_hat, self.task_type, self.metric,
all_scoring_functions=all_scoring_functions)
scoring_functions=scoring_functions)

if hasattr(score, '__len__'):
# TODO: instead of using self.metric, it should use all metrics given by key.
Expand Down
12 changes: 6 additions & 6 deletions autosklearn/evaluation/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class TestEvaluator(AbstractEvaluator):

def __init__(self, backend, queue, metric,
configuration=None,
all_scoring_functions=False,
scoring_functions=None,
seed=1,
include=None,
exclude=None,
Expand All @@ -30,7 +30,7 @@ def __init__(self, backend, queue, metric,
queue=queue,
configuration=configuration,
metric=metric,
all_scoring_functions=all_scoring_functions,
scoring_functions=scoring_functions,
seed=seed,
output_y_hat_optimization=False,
num_run=-1,
Expand Down Expand Up @@ -74,7 +74,7 @@ def predict_and_loss(self, train=False):
prediction=Y_pred,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=self.all_scoring_functions)
scoring_functions=self.scoring_functions)
else:
Y_pred = self.predict_function(self.X_test, self.model,
self.task_type, self.Y_train)
Expand All @@ -83,7 +83,7 @@ def predict_and_loss(self, train=False):
prediction=Y_pred,
task_type=self.task_type,
metric=self.metric,
all_scoring_functions=self.all_scoring_functions)
scoring_functions=self.scoring_functions)

if hasattr(score, '__len__'):
if self.task_type in CLASSIFICATION_TASKS:
Expand All @@ -101,13 +101,13 @@ def predict_and_loss(self, train=False):
# create closure for evaluating an algorithm
# Has a stupid name so pytest doesn't regard it as a test
def eval_t(queue, config, backend, metric, seed, num_run, instance,
all_scoring_functions, output_y_hat_optimization, include,
scoring_functions, output_y_hat_optimization, include,
exclude, disable_file_output, init_params=None, budget_type=None,
budget=None):
evaluator = TestEvaluator(configuration=config,
backend=backend, metric=metric, seed=seed,
queue=queue,
all_scoring_functions=all_scoring_functions,
scoring_functions=scoring_functions,
include=include, exclude=exclude,
disable_file_output=disable_file_output,
init_params=init_params)
Expand Down
Loading