Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabled pipeline fit #1096

Merged
merged 5 commits into from Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 8 additions & 35 deletions autosklearn/automl.py
Expand Up @@ -55,6 +55,7 @@
get_named_client_logger,
)
from autosklearn.util import pipeline, RE_PATTERN
from autosklearn.util.pipeline import parse_include_exclude_components
from autosklearn.util.parallel import preload_modules
from autosklearn.ensemble_builder import EnsembleBuilderManager
from autosklearn.ensembles.singlebest_ensemble import SingleBest
Expand Down Expand Up @@ -1011,7 +1012,13 @@ def fit_pipeline(

# Get the components to include and exclude on the configuration space
# from the estimator attributes
include, exclude = self._get_include_exclude_pipeline_dicts()
include, exclude = parse_include_exclude_components(
task=self._task,
include_estimators=self._exclude_estimators,
franchuterivera marked this conversation as resolved.
Show resolved Hide resolved
exclude_estimators=self._include_estimators,
franchuterivera marked this conversation as resolved.
Show resolved Hide resolved
include_preprocessors=self._include_preprocessors,
exclude_preprocessors=self._exclude_preprocessors,
)

# Prepare missing components to the TAE function call
if 'include' not in kwargs:
Expand Down Expand Up @@ -1075,36 +1082,6 @@ def fit_pipeline(

return pipeline, run_info, run_value

def _get_include_exclude_pipeline_dicts(self):
exclude = dict()
include = dict()
if self._include_preprocessors is not None and self._exclude_preprocessors is not None:
raise ValueError('Cannot specify include_preprocessors and '
'exclude_preprocessors.')
elif self._include_preprocessors is not None:
include['feature_preprocessor'] = self._include_preprocessors
elif self._exclude_preprocessors is not None:
exclude['feature_preprocessor'] = self._exclude_preprocessors

if self._include_estimators is not None and self._exclude_estimators is not None:
raise ValueError('Cannot specify include_estimators and '
'exclude_estimators.')
elif self._include_estimators is not None:
if self._task in CLASSIFICATION_TASKS:
include['classifier'] = self._include_estimators
elif self._task in REGRESSION_TASKS:
include['regressor'] = self._include_estimators
else:
raise ValueError(self._task)
elif self._exclude_estimators is not None:
if self._task in CLASSIFICATION_TASKS:
exclude['classifier'] = self._exclude_estimators
elif self._task in REGRESSION_TASKS:
exclude['regressor'] = self._exclude_estimators
else:
raise ValueError(self._task)
return include, exclude

def predict(self, X, batch_size=None, n_jobs=1):
"""predict.

Expand Down Expand Up @@ -1586,8 +1563,6 @@ def fit(
dataset_name: Optional[str] = None,
only_return_configuration_space: bool = False,
load_models: bool = True,
task: Optional[int] = None,
is_classification: bool = True,
):
y_task = type_of_target(y)
task = self._task_mapping.get(y_task)
Expand Down Expand Up @@ -1678,8 +1653,6 @@ def fit(
dataset_name: Optional[str] = None,
only_return_configuration_space: bool = False,
load_models: bool = True,
task: Optional[int] = None,
is_classification: bool = False,
):

# Check the data provided in y
Expand Down
38 changes: 9 additions & 29 deletions autosklearn/smbo.py
Expand Up @@ -23,14 +23,14 @@
import autosklearn.metalearning
from autosklearn.constants import MULTILABEL_CLASSIFICATION, \
BINARY_CLASSIFICATION, TASK_TYPES_TO_STRING, CLASSIFICATION_TASKS, \
REGRESSION_TASKS, MULTICLASS_CLASSIFICATION, REGRESSION, \
MULTIOUTPUT_REGRESSION
MULTICLASS_CLASSIFICATION, REGRESSION, MULTIOUTPUT_REGRESSION
from autosklearn.ensemble_builder import EnsembleBuilderManager
from autosklearn.metalearning.mismbo import suggest_via_metalearning
from autosklearn.data.abstract_data_manager import AbstractDataManager
from autosklearn.evaluation import ExecuteTaFuncWithQueue, get_cost_of_crash
from autosklearn.util.logging_ import get_named_client_logger
from autosklearn.util.parallel import preload_modules
from autosklearn.util.pipeline import parse_include_exclude_components
from autosklearn.metalearning.metalearning.meta_base import MetaBase
from autosklearn.metalearning.metafeatures.metafeatures import \
calculate_all_metafeatures_with_labels, calculate_all_metafeatures_encoded_labels
Expand Down Expand Up @@ -416,33 +416,13 @@ def run_smbo(self):
# evaluator, which takes into account that a run can be killed prior
# to the model being fully fitted; thus putting intermediate results
# into a queue and querying them once the time is over
exclude = dict()
include = dict()
if self.include_preprocessors is not None and self.exclude_preprocessors is not None:
raise ValueError('Cannot specify include_preprocessors and '
'exclude_preprocessors.')
elif self.include_preprocessors is not None:
include['feature_preprocessor'] = self.include_preprocessors
elif self.exclude_preprocessors is not None:
exclude['feature_preprocessor'] = self.exclude_preprocessors

if self.include_estimators is not None and self.exclude_estimators is not None:
raise ValueError('Cannot specify include_estimators and '
'exclude_estimators.')
elif self.include_estimators is not None:
if self.task in CLASSIFICATION_TASKS:
include['classifier'] = self.include_estimators
elif self.task in REGRESSION_TASKS:
include['regressor'] = self.include_estimators
else:
raise ValueError(self.task)
elif self.exclude_estimators is not None:
if self.task in CLASSIFICATION_TASKS:
exclude['classifier'] = self.exclude_estimators
elif self.task in REGRESSION_TASKS:
exclude['regressor'] = self.exclude_estimators
else:
raise ValueError(self.task)
include, exclude = parse_include_exclude_components(
task=self.task,
include_estimators=self.exclude_estimators,
franchuterivera marked this conversation as resolved.
Show resolved Hide resolved
exclude_estimators=self.include_estimators,
franchuterivera marked this conversation as resolved.
Show resolved Hide resolved
include_preprocessors=self.include_preprocessors,
exclude_preprocessors=self.exclude_preprocessors,
)

ta_kwargs = dict(
backend=copy.deepcopy(self.backend),
Expand Down
43 changes: 30 additions & 13 deletions autosklearn/util/pipeline.py
@@ -1,5 +1,5 @@
# -*- encoding: utf-8 -*-
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple

from ConfigSpace.configuration_space import ConfigurationSpace

Expand All @@ -24,12 +24,13 @@
]


def get_configuration_space(info: Dict[str, Any],
include_estimators: Optional[List[str]] = None,
exclude_estimators: Optional[List[str]] = None,
include_preprocessors: Optional[List[str]] = None,
exclude_preprocessors: Optional[List[str]] = None
) -> ConfigurationSpace:
def parse_include_exclude_components(
task: int,
include_estimators: Optional[List[str]] = None,
exclude_estimators: Optional[List[str]] = None,
include_preprocessors: Optional[List[str]] = None,
exclude_preprocessors: Optional[List[str]] = None
) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
exclude = dict()
include = dict()
if include_preprocessors is not None and \
Expand All @@ -46,19 +47,35 @@ def get_configuration_space(info: Dict[str, Any],
raise ValueError('Cannot specify include_estimators and '
'exclude_estimators.')
elif include_estimators is not None:
if info['task'] in CLASSIFICATION_TASKS:
if task in CLASSIFICATION_TASKS:
include['classifier'] = include_estimators
elif info['task'] in REGRESSION_TASKS:
elif task in REGRESSION_TASKS:
include['regressor'] = include_estimators
else:
raise ValueError(info['task'])
raise ValueError(task)
elif exclude_estimators is not None:
if info['task'] in CLASSIFICATION_TASKS:
if task in CLASSIFICATION_TASKS:
exclude['classifier'] = exclude_estimators
elif info['task'] in REGRESSION_TASKS:
elif task in REGRESSION_TASKS:
exclude['regressor'] = exclude_estimators
else:
raise ValueError(info['task'])
raise ValueError(task)
return include, exclude


def get_configuration_space(info: Dict[str, Any],
include_estimators: Optional[List[str]] = None,
exclude_estimators: Optional[List[str]] = None,
include_preprocessors: Optional[List[str]] = None,
exclude_preprocessors: Optional[List[str]] = None
) -> ConfigurationSpace:
include, exclude = parse_include_exclude_components(
task=info['task'],
include_estimators=include_estimators,
exclude_estimators=exclude_estimators,
include_preprocessors=include_preprocessors,
exclude_preprocessors=exclude_preprocessors,
)

if info['task'] in REGRESSION_TASKS:
return _get_regression_configuration_space(info, include, exclude)
Expand Down