Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,4 @@ jobs:
python examples/tabular/20_basics/example_tabular_regression.py
python examples/tabular/40_advanced/example_custom_configuration_space.py
python examples/tabular/40_advanced/example_resampling_strategy.py
python examples/tabular/40_advanced/example_single_configuration.py
python examples/example_image_classification.py
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Run tests
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi
python -m pytest --forked --durations=20 --timeout=600 --timeout-method=signal -v $codecov test
python -m pytest --durations=20 --timeout=600 --timeout-method=signal -v $codecov test
- name: Check for files left behind by test
if: ${{ always() }}
run: |
Expand Down
314 changes: 64 additions & 250 deletions autoPyTorch/api/base_task.py

Large diffs are not rendered by default.

94 changes: 30 additions & 64 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,67 +108,16 @@ def _get_required_dataset_properties(self, dataset: BaseDataset) -> Dict[str, An
'numerical_columns': dataset.numerical_columns,
'categorical_columns': dataset.categorical_columns}

def build_pipeline(self, dataset_properties: Dict[str, Any],
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
) -> TabularClassificationPipeline:
return TabularClassificationPipeline(dataset_properties=dataset_properties,
include=include_components,
exclude=exclude_components,
search_space_updates=search_space_updates)

def get_dataset(self,
X_train: Union[List, pd.DataFrame, np.ndarray],
y_train: Union[List, pd.DataFrame, np.ndarray],
X_test: Union[List, pd.DataFrame, np.ndarray],
y_test: Union[List, pd.DataFrame, np.ndarray],
resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
dataset_name: Optional[str] = None,
return_only: Optional[bool] = False
) -> BaseDataset:

if dataset_name is None:
dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))

resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
resampling_strategy_args = resampling_strategy_args if resampling_strategy_args is not None else \
self.resampling_strategy_args

# Create a validator object to make sure that the data provided by
# the user matches the autopytorch requirements
InputValidator = TabularInputValidator(
is_classification=True,
logger_port=self._logger_port,
)

# Fit a input validator to check the provided data
# Also, an encoder is fit to both train and test data,
# to prevent unseen categories during inference
InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

dataset = TabularDataset(
X=X_train, Y=y_train,
X_test=X_test, Y_test=y_test,
validator=InputValidator,
resampling_strategy=resampling_strategy,
resampling_strategy_args=resampling_strategy_args,
dataset_name=dataset_name
)
if not return_only:
self.InputValidator = InputValidator
self.dataset = dataset

return dataset
def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularClassificationPipeline:
return TabularClassificationPipeline(dataset_properties=dataset_properties)

def search(
self,
optimize_metric: str,
X_train: Union[List, pd.DataFrame, np.ndarray],
y_train: Union[List, pd.DataFrame, np.ndarray],
X_test: Union[List, pd.DataFrame, np.ndarray],
y_test: Union[List, pd.DataFrame, np.ndarray],
X_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
y_train: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
dataset_name: Optional[str] = None,
budget_type: Optional[str] = None,
budget: Optional[float] = None,
Expand All @@ -194,8 +143,6 @@ def search(
A pair of features (X_train) and targets (y_train) used to fit a
pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
be provided to track the generalization performance of each stage.
dataset_name (Optional[str]):
Name of the dayaset, if None, random value is used
optimize_metric (str): name of the metric that is used to
evaluate a pipeline.
budget_type (Optional[str]):
Expand Down Expand Up @@ -257,12 +204,31 @@ def search(
self

"""
if dataset_name is None:
dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))

self.get_dataset(X_train=X_train,
y_train=y_train,
X_test=X_test,
y_test=y_test,
dataset_name=dataset_name)
# we have to create a logger for at this point for the validator
self._logger = self._get_logger(dataset_name)

# Create a validator object to make sure that the data provided by
# the user matches the autopytorch requirements
self.InputValidator = TabularInputValidator(
is_classification=True,
logger_port=self._logger_port,
)

# Fit a input validator to check the provided data
# Also, an encoder is fit to both train and test data,
# to prevent unseen categories during inference
self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

self.dataset = TabularDataset(
X=X_train, Y=y_train,
X_test=X_test, Y_test=y_test,
validator=self.InputValidator,
resampling_strategy=self.resampling_strategy,
resampling_strategy_args=self.resampling_strategy_args,
)

return self._search(
dataset=self.dataset,
Expand Down
86 changes: 26 additions & 60 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,59 +100,8 @@ def _get_required_dataset_properties(self, dataset: BaseDataset) -> Dict[str, An
'numerical_columns': dataset.numerical_columns,
'categorical_columns': dataset.categorical_columns}

def build_pipeline(self, dataset_properties: Dict[str, Any],
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
) -> TabularRegressionPipeline:
return TabularRegressionPipeline(dataset_properties=dataset_properties,
include=include_components,
exclude=exclude_components,
search_space_updates=search_space_updates)

def get_dataset(self,
X_train: Union[List, pd.DataFrame, np.ndarray],
y_train: Union[List, pd.DataFrame, np.ndarray],
X_test: Union[List, pd.DataFrame, np.ndarray],
y_test: Union[List, pd.DataFrame, np.ndarray],
resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]] = None,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
dataset_name: Optional[str] = None,
return_only: Optional[bool] = False
) -> BaseDataset:

if dataset_name is None:
dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))

resampling_strategy = resampling_strategy if resampling_strategy is not None else self.resampling_strategy
resampling_strategy_args = resampling_strategy_args if resampling_strategy_args is not None else \
self.resampling_strategy_args

# Create a validator object to make sure that the data provided by
# the user matches the autopytorch requirements
InputValidator = TabularInputValidator(
is_classification=False,
logger_port=self._logger_port,
)

# Fit a input validator to check the provided data
# Also, an encoder is fit to both train and test data,
# to prevent unseen categories during inference
InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

dataset = TabularDataset(
X=X_train, Y=y_train,
X_test=X_test, Y_test=y_test,
validator=InputValidator,
resampling_strategy=resampling_strategy,
resampling_strategy_args=resampling_strategy_args,
dataset_name=dataset_name
)
if not return_only:
self.InputValidator = InputValidator
self.dataset = dataset

return dataset
def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularRegressionPipeline:
return TabularRegressionPipeline(dataset_properties=dataset_properties)

def search(
self,
Expand Down Expand Up @@ -243,14 +192,31 @@ def search(
self

"""
if dataset_name is None:
dataset_name = str(uuid.uuid1(clock_seq=os.getpid()))

self.get_dataset(X_train=X_train,
y_train=y_train,
X_test=X_test,
y_test=y_test,
resampling_strategy=self.resampling_strategy,
resampling_strategy_args=self.resampling_strategy_args,
dataset_name=dataset_name)
# we have to create a logger for at this point for the validator
self._logger = self._get_logger(dataset_name)

# Create a validator object to make sure that the data provided by
# the user matches the autopytorch requirements
self.InputValidator = TabularInputValidator(
is_classification=False,
logger_port=self._logger_port,
)

# Fit a input validator to check the provided data
# Also, an encoder is fit to both train and test data,
# to prevent unseen categories during inference
self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

self.dataset = TabularDataset(
X=X_train, Y=y_train,
X_test=X_test, Y_test=y_test,
validator=self.InputValidator,
resampling_strategy=self.resampling_strategy,
resampling_strategy_args=self.resampling_strategy_args,
)

return self._search(
dataset=self.dataset,
Expand Down
2 changes: 1 addition & 1 deletion autoPyTorch/evaluation/tae.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def __init__(
include: typing.Optional[typing.Dict[str, typing.Any]] = None,
exclude: typing.Optional[typing.Dict[str, typing.Any]] = None,
memory_limit: typing.Optional[int] = None,
disable_file_output: typing.Union[bool, typing.List] = False,
disable_file_output: bool = False,
init_params: typing.Dict[str, typing.Any] = None,
budget_type: str = None,
ta: typing.Optional[typing.Callable] = None,
Expand Down
2 changes: 1 addition & 1 deletion autoPyTorch/optimizer/smbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self,
resampling_strategy_args: typing.Optional[typing.Dict[str, typing.Any]] = None,
include: typing.Optional[typing.Dict[str, typing.Any]] = None,
exclude: typing.Optional[typing.Dict[str, typing.Any]] = None,
disable_file_output: typing.Union[bool, typing.List] = [],
disable_file_output: typing.List = [],
smac_scenario_args: typing.Optional[typing.Dict[str, typing.Any]] = None,
get_smac_object_callback: typing.Optional[typing.Callable] = None,
all_supported_metrics: bool = True,
Expand Down
85 changes: 0 additions & 85 deletions examples/tabular/40_advanced/example_single_configuration.py

This file was deleted.

3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@
"codecov",
"pep8",
"mypy",
"openml",
"pytest-forked"
"openml"
],
"examples": [
"matplotlib",
Expand Down
Loading