Skip to content

Commit

Permalink
Feedback from PR
Browse files Browse the repository at this point in the history
  • Loading branch information
franchuterivera committed Apr 12, 2021
1 parent e5539dc commit 426c2e1
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 34 deletions.
35 changes: 25 additions & 10 deletions autosklearn/automl.py
Expand Up @@ -8,7 +8,7 @@
import os
import sys
import time
from typing import Any, Dict, Optional, List, Tuple
from typing import Any, Dict, Optional, List, Tuple, Union
import uuid
import unittest.mock
import warnings
Expand Down Expand Up @@ -950,10 +950,11 @@ def fit_pipeline(
y: SUPPORTED_TARGET_TYPES,
task: int,
is_classification: bool,
config: Configuration,
config: Union[Configuration, Dict[str, Union[str, float, int]]],
dataset_name: Optional[str] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
feat_type: Optional[List[str]] = None,
**kwargs: Dict,
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
""" Fits and individual pipeline configuration and returns
Expand All @@ -975,13 +976,21 @@ def fit_pipeline(
If provided, the testing performance will be tracked on this features.
y_test: array-like
If provided, the testing performance will be tracked on this labels
config: Configuration
A configuration object used to define a pipeline steps
config: Union[Configuration, Dict[str, Union[str, float, int]]]
A configuration object used to define the pipeline steps. If a dictionary is passed,
a configuration is created based on this dictionary.
dataset_name: Optional[str]
A string to tag and identify the Auto-Sklearn run
is_classification: bool
Whether the task is for classification or regression. This affects
how the targets are treated
feat_type : list, optional (default=None)
List of str of `len(X.shape[1])` describing the attribute type.
Possible types are `Categorical` and `Numerical`. `Categorical`
attributes will be automatically One-Hot encoded. The values
used for a categorical attribute must be integers, obtained for
example by `sklearn.preprocessing.LabelEncoder
<http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html>`_.
Returns
-------
Expand All @@ -999,23 +1008,25 @@ def fit_pipeline(
# dataset
if self.configuration_space is None:
self.configuration_space = self.fit(
X=X, y=y, task=task,
X=X, y=y,
dataset_name=dataset_name if dataset_name is not None else self._dataset_name,
X_test=X_test,
y_test=y_test,
feat_type=kwargs.pop('feat_type', self._feat_type),
feat_type=feat_type,
only_return_configuration_space=True)

# We do not want to overwrite existing runs
self.num_run += 1
if isinstance(config, dict):
config = Configuration(self.configuration_space, config)
config.config_id = self.num_run

# Get the components to include and exclude on the configuration space
# from the estimator attributes
include, exclude = parse_include_exclude_components(
task=self._task,
include_estimators=self._exclude_estimators,
exclude_estimators=self._include_estimators,
include_estimators=self._include_estimators,
exclude_estimators=self._exclude_estimators,
include_preprocessors=self._include_preprocessors,
exclude_preprocessors=self._exclude_preprocessors,
)
Expand Down Expand Up @@ -1591,10 +1602,11 @@ def fit_pipeline(
self,
X: SUPPORTED_FEAT_TYPES,
y: SUPPORTED_TARGET_TYPES,
config: Configuration,
config: Union[Configuration, Dict[str, Union[str, float, int]]],
dataset_name: Optional[str] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
feat_type: Optional[List[str]] = None,
**kwargs,
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
y_task = type_of_target(y)
Expand All @@ -1615,6 +1627,7 @@ def fit_pipeline(
config=config,
task=task,
is_classification=True,
feat_type=feat_type,
**kwargs,
)

Expand Down Expand Up @@ -1681,10 +1694,11 @@ def fit_pipeline(
self,
X: SUPPORTED_FEAT_TYPES,
y: SUPPORTED_TARGET_TYPES,
config: Configuration,
config: Union[Configuration, Dict[str, Union[str, float, int]]],
dataset_name: Optional[str] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
feat_type: Optional[List[str]] = None,
**kwargs: Dict,
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:

Expand All @@ -1703,6 +1717,7 @@ def fit_pipeline(
X_test=X_test, y_test=y_test,
config=config,
task=task,
feat_type=feat_type,
dataset_name=dataset_name,
is_classification=False,
**kwargs,
Expand Down
54 changes: 32 additions & 22 deletions autosklearn/estimators.py
@@ -1,6 +1,6 @@
# -*- encoding: utf-8 -*-

from typing import Optional, Dict, List, Tuple
from typing import Optional, Dict, List, Tuple, Union

from ConfigSpace.configuration_space import Configuration
import dask.distributed
Expand Down Expand Up @@ -349,10 +349,11 @@ def fit_pipeline(
self,
X: SUPPORTED_FEAT_TYPES,
y: SUPPORTED_TARGET_TYPES,
config: Configuration,
config: Union[Configuration, Dict[str, Union[str, float, int]]],
dataset_name: Optional[str] = None,
X_test: Optional[SUPPORTED_FEAT_TYPES] = None,
y_test: Optional[SUPPORTED_TARGET_TYPES] = None,
feat_type: Optional[List[str]] = None,
*args,
**kwargs: Dict,
) -> Tuple[Optional[BasePipeline], RunInfo, RunValue]:
Expand All @@ -369,35 +370,44 @@ def fit_pipeline(
Parameters
----------
X: array-like, shape = (n_samples, n_features)
The features used for training
y: array-like
The labels used for training
X_test: Optionalarray-like, shape = (n_samples, n_features)
If provided, the testing performance will be tracked on this features.
y_test: array-like
If provided, the testing performance will be tracked on this labels
config: Configuration
A configuration object used to define a pipeline steps
dataset_name: Optional[str]
Name that will be used to tag the Auto-Sklearn run and identify the
Auto-Sklearn run
X: array-like, shape = (n_samples, n_features)
The features used for training
y: array-like
The labels used for training
X_test: Optionalarray-like, shape = (n_samples, n_features)
If provided, the testing performance will be tracked on this features.
y_test: array-like
If provided, the testing performance will be tracked on this labels
config: Union[Configuration, Dict[str, Union[str, float, int]]]
A configuration object used to define the pipeline steps.
If a dictionary is passed, a configuration is created based on this dictionary.
dataset_name: Optional[str]
Name that will be used to tag the Auto-Sklearn run and identify the
Auto-Sklearn run
feat_type : list, optional (default=None)
List of str of `len(X.shape[1])` describing the attribute type.
Possible types are `Categorical` and `Numerical`. `Categorical`
attributes will be automatically One-Hot encoded. The values
used for a categorical attribute must be integers, obtained for
example by `sklearn.preprocessing.LabelEncoder
<http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html>`_.
Returns
-------
pipeline: Optional[BasePipeline]
The fitted pipeline. In case of failure while fitting the pipeline,
a None is returned.
run_info: RunInFo
A named tuple that contains the configuration launched
run_value: RunValue
A named tuple that contains the result of the run
pipeline: Optional[BasePipeline]
The fitted pipeline. In case of failure while fitting the pipeline,
a None is returned.
run_info: RunInFo
A named tuple that contains the configuration launched
run_value: RunValue
A named tuple that contains the result of the run
"""
if self.automl_ is None:
self.automl_ = self.build_automl()
return self.automl_.fit_pipeline(X=X, y=y,
dataset_name=dataset_name,
config=config,
feat_type=feat_type,
X_test=X_test, y_test=y_test,
*args, **kwargs)

Expand Down
4 changes: 2 additions & 2 deletions autosklearn/smbo.py
Expand Up @@ -418,8 +418,8 @@ def run_smbo(self):
# into a queue and querying them once the time is over
include, exclude = parse_include_exclude_components(
task=self.task,
include_estimators=self.exclude_estimators,
exclude_estimators=self.include_estimators,
include_estimators=self.include_estimators,
exclude_estimators=self.exclude_estimators,
include_preprocessors=self.include_preprocessors,
exclude_preprocessors=self.exclude_preprocessors,
)
Expand Down
2 changes: 2 additions & 0 deletions test/test_automl/test_estimators.py
Expand Up @@ -732,6 +732,7 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
per_run_time_limit=30,
ensemble_size=0,
dask_client=dask_client,
include_estimators=['random_forest'],
seed=seed,
# We cannot get the configuration space with 'test' not fit with it
resampling_strategy=resampling_strategy if resampling_strategy != 'test' else 'holdout',
Expand Down Expand Up @@ -765,6 +766,7 @@ def test_fit_pipeline(dask_client, task_type, resampling_strategy, disable_file_
else:
# We should have fitted a pipeline with named_steps
assert hasattr(pipeline, 'named_steps')
assert 'RandomForest' in pipeline.steps[-1][-1].choice.__class__.__name__

# Num run should be 2, as 1 is for dummy classifier and we have not launch
# another pipeline
Expand Down

0 comments on commit 426c2e1

Please sign in to comment.