Skip to content

Commit

Permalink
Merge branch 'development' into add-col_tfr
Browse files Browse the repository at this point in the history
  • Loading branch information
ravinkohli committed Oct 26, 2021
2 parents 7288128 + 9002937 commit f1e837d
Show file tree
Hide file tree
Showing 32 changed files with 709 additions and 273 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8]
python-version: [3.7, 3.8, 3.9]
include:
- python-version: 3.8
code-cov: true
Expand Down
155 changes: 89 additions & 66 deletions autoPyTorch/api/base_task.py

Large diffs are not rendered by default.

47 changes: 36 additions & 11 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,9 @@ def search(
X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
dataset_name: Optional[str] = None,
budget_type: Optional[str] = None,
budget: Optional[float] = None,
budget_type: str = 'epochs',
min_budget: int = 5,
max_budget: int = 50,
total_walltime_limit: int = 100,
func_eval_time_limit_secs: Optional[int] = None,
enable_traditional_pipeline: bool = True,
Expand All @@ -137,15 +138,38 @@ def search(
be provided to track the generalization performance of each stage.
optimize_metric (str):
name of the metric that is used to evaluate a pipeline.
budget_type (Optional[str]):
budget_type (str):
Type of budget to be used when fitting the pipeline.
Either 'epochs' or 'runtime'. If not provided, uses
the default in the pipeline config ('epochs')
budget (Optional[float]):
Budget to fit a single run of the pipeline. If not
provided, uses the default in the pipeline config
total_walltime_limit (int), (default=100):
Time limit in seconds for the search of appropriate models.
It can be one of:
+ 'epochs': The training of each pipeline will be terminated after
a number of epochs have passed. This number of epochs is determined by the
budget argument of this method.
+ 'runtime': The training of each pipeline will be terminated after
a number of seconds have passed. This number of seconds is determined by the
budget argument of this method. The overall fitting time of a pipeline is
controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
time to train a pipeline, but it does not consider the overall time it takes
to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
budget_type will determine the units of min_budget/max_budget. If budget_type=='epochs'
is used, min_budget will refer to epochs whereas if budget_type=='runtime' then
min_budget will refer to seconds.
min_budget (int):
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
trade-off resources between running many pipelines at min_budget and
running the top performing pipelines on max_budget.
min_budget states the minimum resource allocation a pipeline should have
so that we can compare and quickly discard bad performing models.
For example, if the budget_type is epochs, and min_budget=5, then we will
run every pipeline to a minimum of 5 epochs before performance comparison.
max_budget (int):
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
trade-off resources between running many pipelines at min_budget and
running the top performing pipelines on max_budget.
max_budget states the maximum resource allocation a pipeline is going to
be ran. For example, if the budget_type is epochs, and max_budget=50,
then the pipeline training will be terminated after 50 epochs.
total_walltime_limit (int), (default=100): Time limit
in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
chance of finding better models.
func_eval_time_limit_secs (int), (default=None):
Expand Down Expand Up @@ -234,7 +258,8 @@ def search(
dataset=self.dataset,
optimize_metric=optimize_metric,
budget_type=budget_type,
budget=budget,
min_budget=min_budget,
max_budget=max_budget,
total_walltime_limit=total_walltime_limit,
func_eval_time_limit_secs=func_eval_time_limit_secs,
enable_traditional_pipeline=enable_traditional_pipeline,
Expand Down
77 changes: 51 additions & 26 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,23 @@ class TabularRegressionTask(BaseTask):
"""

def __init__(
self,
seed: int = 1,
n_jobs: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
max_models_on_disc: int = 50,
temporary_directory: Optional[str] = None,
output_directory: Optional[str] = None,
delete_tmp_folder_after_terminate: bool = True,
delete_output_folder_after_terminate: bool = True,
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
backend: Optional[Backend] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
self,
seed: int = 1,
n_jobs: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
max_models_on_disc: int = 50,
temporary_directory: Optional[str] = None,
output_directory: Optional[str] = None,
delete_tmp_folder_after_terminate: bool = True,
delete_output_folder_after_terminate: bool = True,
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
backend: Optional[Backend] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
):
super().__init__(
seed=seed,
Expand Down Expand Up @@ -102,8 +102,9 @@ def search(
X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
dataset_name: Optional[str] = None,
budget_type: Optional[str] = None,
budget: Optional[float] = None,
budget_type: str = 'epochs',
min_budget: int = 5,
max_budget: int = 50,
total_walltime_limit: int = 100,
func_eval_time_limit_secs: Optional[int] = None,
enable_traditional_pipeline: bool = True,
Expand All @@ -129,13 +130,36 @@ def search(
be provided to track the generalization performance of each stage.
optimize_metric (str): name of the metric that is used to
evaluate a pipeline.
budget_type (Optional[str]):
budget_type (str):
Type of budget to be used when fitting the pipeline.
Either 'epochs' or 'runtime'. If not provided, uses
the default in the pipeline config ('epochs')
budget (Optional[float]):
Budget to fit a single run of the pipeline. If not
provided, uses the default in the pipeline config
It can be one of:
+ 'epochs': The training of each pipeline will be terminated after
a number of epochs have passed. This number of epochs is determined by the
budget argument of this method.
+ 'runtime': The training of each pipeline will be terminated after
a number of seconds have passed. This number of seconds is determined by the
budget argument of this method. The overall fitting time of a pipeline is
controlled by func_eval_time_limit_secs. 'runtime' only controls the allocated
time to train a pipeline, but it does not consider the overall time it takes
to create a pipeline (data loading and preprocessing, other i/o operations, etc.).
budget_type will determine the units of min_budget/max_budget. If budget_type=='epochs'
is used, min_budget will refer to epochs whereas if budget_type=='runtime' then
min_budget will refer to seconds.
min_budget (int):
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
trade-off resources between running many pipelines at min_budget and
running the top performing pipelines on max_budget.
min_budget states the minimum resource allocation a pipeline should have
so that we can compare and quickly discard bad performing models.
For example, if the budget_type is epochs, and min_budget=5, then we will
run every pipeline to a minimum of 5 epochs before performance comparison.
max_budget (int):
Auto-PyTorch uses `Hyperband <https://arxiv.org/abs/1603.06560>_` to
trade-off resources between running many pipelines at min_budget and
running the top performing pipelines on max_budget.
max_budget states the maximum resource allocation a pipeline is going to
be ran. For example, if the budget_type is epochs, and max_budget=50,
then the pipeline training will be terminated after 50 epochs.
total_walltime_limit (int), (default=100): Time limit
in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
Expand Down Expand Up @@ -227,7 +251,8 @@ def search(
dataset=self.dataset,
optimize_metric=optimize_metric,
budget_type=budget_type,
budget=budget,
min_budget=min_budget,
max_budget=max_budget,
total_walltime_limit=total_walltime_limit,
func_eval_time_limit_secs=func_eval_time_limit_secs,
enable_traditional_pipeline=enable_traditional_pipeline,
Expand Down
17 changes: 8 additions & 9 deletions autoPyTorch/configs/default_pipeline_options.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
{
"device": "cpu",
"budget_type": "epochs",
"min_epochs": 5,
"epochs": 50,
"runtime": 3600,
"torch_num_threads": 1,
"early_stopping": 20,
"use_tensorboard_logger": "False",
"metrics_during_training": "True"
"device": "cpu",
"budget_type": "epochs",
"epochs": 50,
"runtime": 3600,
"torch_num_threads": 1,
"early_stopping": 20,
"use_tensorboard_logger": "False",
"metrics_during_training": "True"
}
10 changes: 8 additions & 2 deletions autoPyTorch/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
calculate_loss,
get_metrics,
)
from autoPyTorch.utils.common import subsampler
from autoPyTorch.utils.common import dict_repr, subsampler
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
from autoPyTorch.utils.pipeline import get_dataset_requirements
Expand Down Expand Up @@ -515,6 +515,12 @@ def __init__(self, backend: Backend,
# If the budget is epochs, we want to limit that in the fit dictionary
if self.budget_type == 'epochs':
self.fit_dictionary['epochs'] = budget
self.fit_dictionary.pop('runtime', None)
elif self.budget_type == 'runtime':
self.fit_dictionary['runtime'] = budget
self.fit_dictionary.pop('epochs', None)
else:
raise ValueError(f"Unsupported budget type {self.budget_type} provided")

self.num_run = 0 if num_run is None else num_run

Expand All @@ -531,7 +537,7 @@ def __init__(self, backend: Backend,
self.Y_actual_train: Optional[np.ndarray] = None
self.pipelines: Optional[List[BaseEstimator]] = None
self.pipeline: Optional[BaseEstimator] = None
self.logger.debug("Fit dictionary in Abstract evaluator: {}".format(self.fit_dictionary))
self.logger.debug("Fit dictionary in Abstract evaluator: {}".format(dict_repr(self.fit_dictionary)))
self.logger.debug("Search space updates :{}".format(self.search_space_updates))

def _get_pipeline(self) -> BaseEstimator:
Expand Down
24 changes: 18 additions & 6 deletions autoPyTorch/evaluation/tae.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from autoPyTorch.automl_common.common.utils.backend import Backend
from autoPyTorch.evaluation.utils import empty_queue, extract_learning_curve, read_queue
from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
from autoPyTorch.utils.common import replace_string_bool_to_bool
from autoPyTorch.utils.common import dict_repr, replace_string_bool_to_bool
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
from autoPyTorch.utils.parallel import preload_modules
Expand Down Expand Up @@ -209,9 +209,14 @@ def run_wrapper(
)
else:
if run_info.budget == 0:
run_info = run_info._replace(budget=self.pipeline_config[self.budget_type])
elif run_info.budget <= 0 or run_info.budget > 100:
raise ValueError('Illegal value for budget, must be >0 and <=100, but is %f' %
# SMAC can return budget zero for intensifiers that don't have a concept
# of budget, for example a simple bayesian optimization intensifier.
# Budget determines how our pipeline trains, which can be via runtime or epochs
epochs_budget = self.pipeline_config.get('epochs', np.inf)
runtime_budget = self.pipeline_config.get('runtime', np.inf)
run_info = run_info._replace(budget=min(epochs_budget, runtime_budget))
elif run_info.budget <= 0:
raise ValueError('Illegal value for budget, must be greater than zero but is %f' %
run_info.budget)
if self.budget_type not in ('epochs', 'runtime'):
raise ValueError("Illegal value for budget type, must be one of "
Expand Down Expand Up @@ -454,7 +459,14 @@ def run(

empty_queue(queue)
self.logger.debug(
'Finished function evaluation %s. Status: %s, Cost: %f, Runtime: %f, Additional %s',
str(num_run), status, cost, runtime, additional_run_info,
"Finish function evaluation {}.\n"
"Status: {}, Cost: {}, Runtime: {},\n"
"Additional information:\n{}".format(
str(num_run),
status,
cost,
runtime,
dict_repr(additional_run_info)
)
)
return status, cost, runtime, additional_run_info
12 changes: 6 additions & 6 deletions autoPyTorch/evaluation/train_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
fit_and_suppress_warnings
)
from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
from autoPyTorch.utils.common import subsampler
from autoPyTorch.utils.common import dict_repr, subsampler
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates

__all__ = ['TrainEvaluator', 'eval_function']
Expand Down Expand Up @@ -172,11 +172,11 @@ def fit_predict_and_loss(self) -> None:

status = StatusType.SUCCESS

self.logger.debug("In train evaluator fit_predict_and_loss, num_run: {} loss:{},"
" additional run info:{}, status: {}".format(self.num_run,
loss,
additional_run_info,
status))
self.logger.debug("In train evaluator.fit_predict_and_loss, num_run: {} loss:{},"
" status: {},\nadditional run info:\n{}".format(self.num_run,
loss,
dict_repr(additional_run_info),
status))
self.finish_up(
loss=loss,
train_loss=train_loss,
Expand Down

0 comments on commit f1e837d

Please sign in to comment.