Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Greedy Portfolio #200

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 19 additions & 2 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class BaseTask:
exclude_components (Optional[Dict]): If None, all possible components are used.
Otherwise specifies set of components not to use. Incompatible with include
components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""

def __init__(
Expand Down Expand Up @@ -702,6 +705,8 @@ def _search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
portfolio_selection: str = "none",
portfolio_path: Optional[str] = None
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -772,7 +777,14 @@ def _search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.

portfolio_selection (str), (default="none"): If "greedy",
runs initial configurations present in
'autoPyTorch/configs/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
portfolio_path (Optional[str]):
Optional argument to specify path to a portfolio file.
Returns:
self

Expand All @@ -781,6 +793,9 @@ def _search(
raise ValueError("Incompatible dataset entered for current task,"
"expected dataset to have task type :{} got "
":{}".format(self.task_type, dataset.task_type))
if portfolio_selection not in ["none", "greedy"]:
ravinkohli marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("Expected portfolio_selection to be in ['none', 'greedy']"
"got {}".format(portfolio_selection))

# Initialise information needed for the experiment
experiment_task_name: str = 'runSearch'
Expand Down Expand Up @@ -957,7 +972,9 @@ def _search(
# We do not increase the num_run here, this is something
# smac does internally
start_num_run=self._backend.get_next_num_run(peek=True),
search_space_updates=self.search_space_updates
search_space_updates=self.search_space_updates,
portfolio_selection=portfolio_selection,
portfolio_path=portfolio_path
)
try:
run_history, self.trajectory, budget_type = \
Expand Down
49 changes: 32 additions & 17 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class TabularClassificationTask(BaseTask):
If None, all possible components are used. Otherwise
specifies set of components not to use. Incompatible
with include components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""
def __init__(
self,
Expand Down Expand Up @@ -131,6 +134,8 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
portfolio_selection: str = "none",
portfolio_path: Optional[str] = None
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand All @@ -143,21 +148,21 @@ def search(
A pair of features (X_train) and targets (y_train) used to fit a
pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
be provided to track the generalization performance of each stage.
optimize_metric (str): name of the metric that is used to
evaluate a pipeline.
optimize_metric (str):
name of the metric that is used to evaluate a pipeline.
budget_type (Optional[str]):
Type of budget to be used when fitting the pipeline.
Either 'epochs' or 'runtime'. If not provided, uses
the default in the pipeline config ('epochs')
budget (Optional[float]):
Budget to fit a single run of the pipeline. If not
provided, uses the default in the pipeline config
total_walltime_limit (int), (default=100): Time limit
in seconds for the search of appropriate models.
total_walltime_limit (int), (default=100):
Time limit in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
chance of finding better models.
func_eval_time_limit_secs (int), (default=None): Time limit
for a single call to the machine learning model.
func_eval_time_limit_secs (int), (default=None):
Time limit for a single call to the machine learning model.
Model fitting will be terminated if the machine
learning algorithm runs over the time limit. Set
this value high enough so that typical machine
Expand All @@ -174,32 +179,40 @@ def search(
feature by turning this flag to False. All machine learning
algorithms that are fitted during search() are considered for
ensemble building.
memory_limit (Optional[int]), (default=4096): Memory
limit in MB for the machine learning algorithm. autopytorch
memory_limit (Optional[int]), (default=4096):
Memory limit in MB for the machine learning algorithm. autopytorch
will stop fitting the machine learning algorithm if it tries
to allocate more than memory_limit MB. If None is provided,
no memory limit is set. In case of multi-processing, memory_limit
will be per job. This memory limit also applies to the ensemble
creation process.
smac_scenario_args (Optional[Dict]): Additional arguments inserted
into the scenario of SMAC. See the
smac_scenario_args (Optional[Dict]):
Additional arguments inserted into the scenario of SMAC. See the
[SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)
get_smac_object_callback (Optional[Callable]): Callback function
to create an object of class
get_smac_object_callback (Optional[Callable]):
Callback function to create an object of class
[smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).
The function must accept the arguments scenario_dict,
instances, num_params, runhistory, seed and ta. This is
an advanced feature. Use only if you are familiar with
[SMAC](https://automl.github.io/SMAC3/master/index.html).
all_supported_metrics (bool), (default=True): if True, all
metrics supporting current task will be calculated
all_supported_metrics (bool), (default=True):
if True, all metrics supporting current task will be calculated
for each pipeline and results will be available via cv_results
precision (int), (default=32): Numeric precision used when loading
ensemble data. Can be either '16', '32' or '64'.
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.

load_models (bool), (default=True):
Whether to load the models after fitting AutoPyTorch.
portfolio_selection (str), (default="none"):
If "greedy", runs initial configurations present in
portfolio_path, if specified else, those in
'autoPyTorch/configs/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
portfolio_path (Optional[str]):
Optional argument to specify path to a portfolio file.
Returns:
self

Expand Down Expand Up @@ -245,6 +258,8 @@ def search(
precision=precision,
disable_file_output=disable_file_output,
load_models=load_models,
portfolio_selection=portfolio_selection,
portfolio_path=portfolio_path
)

def predict(
Expand Down
15 changes: 15 additions & 0 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class TabularRegressionTask(BaseTask):
exclude_components (Optional[Dict]): If None, all possible components are used.
Otherwise specifies set of components not to use. Incompatible with include
components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""

def __init__(
Expand Down Expand Up @@ -123,6 +126,8 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
portfolio_selection: str = "none",
portfolio_path: Optional[str] = None
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -187,6 +192,14 @@ def search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
portfolio_selection (str), (default="none"): If "greedy",
runs initial configurations present in
'autoPyTorch/configs/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
portfolio_path (Optional[str]):
Optional argument to specify path to a portfolio file.

Returns:
self
Expand Down Expand Up @@ -233,6 +246,8 @@ def search(
precision=precision,
disable_file_output=disable_file_output,
load_models=load_models,
portfolio_selection=portfolio_selection,
portfolio_path=portfolio_path
)

def predict(
Expand Down