Skip to content

Commit

Permalink
Address comments from shuhei, change run_greedy to portfolio_selection
Browse files Browse the repository at this point in the history
  • Loading branch information
ravinkohli committed May 17, 2021
1 parent af8fda6 commit 9929fb0
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 27 deletions.
11 changes: 7 additions & 4 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class BaseTask:
exclude_components (Optional[Dict]): If None, all possible components are used.
Otherwise specifies set of components not to use. Incompatible with include
components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""

def __init__(
Expand Down Expand Up @@ -702,7 +705,7 @@ def _search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
run_greedy_portfolio: bool = False
portfolio_selection: str = "none"
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -773,12 +776,12 @@ def _search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
run_greedy_portfolio (bool), (default=False): If True,
portfolio_selection (str), (default="none"): If "greedy",
runs initial configurations present in
'autoPyTorch/optimizer/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
Returns:
self
Expand Down Expand Up @@ -964,7 +967,7 @@ def _search(
# smac does internally
start_num_run=self._backend.get_next_num_run(peek=True),
search_space_updates=self.search_space_updates,
run_greedy_portfolio=run_greedy_portfolio
portfolio_selection=portfolio_selection
)
try:
run_history, self.trajectory, budget_type = \
Expand Down
11 changes: 7 additions & 4 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class TabularClassificationTask(BaseTask):
If None, all possible components are used. Otherwise
specifies set of components not to use. Incompatible
with include components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""
def __init__(
self,
Expand Down Expand Up @@ -131,7 +134,7 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
run_greedy_portfolio: bool = False
portfolio_selection: str = "none"
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -200,12 +203,12 @@ def search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
run_greedy_portfolio (bool), (default=False): If True,
portfolio_selection (str), (default="none"): If "greedy",
runs initial configurations present in
'autoPyTorch/optimizer/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
Returns:
self
Expand Down Expand Up @@ -252,7 +255,7 @@ def search(
precision=precision,
disable_file_output=disable_file_output,
load_models=load_models,
run_greedy_portfolio=run_greedy_portfolio
portfolio_selection=portfolio_selection
)

def predict(
Expand Down
19 changes: 11 additions & 8 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class TabularRegressionTask(BaseTask):
exclude_components (Optional[Dict]): If None, all possible components are used.
Otherwise specifies set of components not to use. Incompatible with include
components
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
"""

def __init__(
Expand Down Expand Up @@ -123,7 +126,7 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
run_greedy_portfolio: bool = False
portfolio_selection: str = "none"
) -> 'BaseTask':
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -188,12 +191,12 @@ def search(
disable_file_output (Union[bool, List]):
load_models (bool), (default=True): Whether to load the
models after fitting AutoPyTorch.
run_greedy_portfolio (bool), (default=False): If True,
runs initial configurations present in
'autoPyTorch/optimizer/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
portfolio_selection (str), (default="none"): If "greedy",
runs initial configurations present in
'autoPyTorch/optimizer/greedy_portfolio.json'.
These configurations are the best performing configurations
when search was performed on meta training datasets.
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
Returns:
self
Expand Down Expand Up @@ -239,7 +242,7 @@ def search(
precision=precision,
disable_file_output=disable_file_output,
load_models=load_models,
run_greedy_portfolio=run_greedy_portfolio
portfolio_selection=portfolio_selection
)

def predict(
Expand Down
7 changes: 4 additions & 3 deletions autoPyTorch/optimizer/smbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def __init__(self,
ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
logger_port: typing.Optional[int] = None,
search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
run_greedy_portfolio: bool = False
portfolio_selection: str = "none"
):
"""
Interface to SMAC. This method calls the SMAC optimize method, and allows
Expand Down Expand Up @@ -158,7 +158,7 @@ def __init__(self,
Allows to create a user specified SMAC object
ensemble_callback (typing.Optional[EnsembleBuilderManager]):
A callback used in this scenario to start ensemble building subtasks
run_greedy_portfolio (bool), (default=False): If True,
portfolio_selection (str), (default="none"): If "greedy",
runs initial configurations present in
'autoPyTorch/optimizer/greedy_portfolio.json'.
"""
Expand Down Expand Up @@ -217,7 +217,8 @@ def __init__(self,
initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))

self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
if run_greedy_portfolio:
assert portfolio_selection in ['none', 'greedy']
if portfolio_selection == "greedy":
self.initial_configurations = list()
for configuration_dict in initial_configurations:
try:
Expand Down
3 changes: 3 additions & 0 deletions autoPyTorch/pipeline/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class BasePipeline(Pipeline):
random_state (np.random.RandomState): allows to produce reproducible results by
setting a seed for randomized settings
init_params (Optional[Dict[str, Any]])
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
Attributes:
Expand Down
13 changes: 12 additions & 1 deletion autoPyTorch/pipeline/image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,18 @@ class ImageClassificationPipeline(ClassifierMixin, BasePipeline):
Args:
config (Configuration)
The configuration to evaluate.
random_state (Optional[RandomState): random_state is the random number generator
steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
build the pipeline. If provided, they won't be dynamically produced.
include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to honor during the creation of the configuration space.
exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to avoid during the creation of the configuration space.
random_state (np.random.RandomState): allows to produce reproducible results by
setting a seed for randomized settings
init_params (Optional[Dict[str, Any]])
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
Attributes:
Examples
Expand Down
13 changes: 12 additions & 1 deletion autoPyTorch/pipeline/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,18 @@ class TabularClassificationPipeline(ClassifierMixin, BasePipeline):
Args:
config (Configuration)
The configuration to evaluate.
random_state (Optional[RandomState): random_state is the random number generator
steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
build the pipeline. If provided, they won't be dynamically produced.
include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to honor during the creation of the configuration space.
exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to avoid during the creation of the configuration space.
random_state (np.random.RandomState): allows to produce reproducible results by
setting a seed for randomized settings
init_params (Optional[Dict[str, Any]])
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
Attributes:
Examples
Expand Down
13 changes: 12 additions & 1 deletion autoPyTorch/pipeline/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,18 @@ class TabularRegressionPipeline(RegressorMixin, BasePipeline):
Args:
config (Configuration)
The configuration to evaluate.
random_state (Optional[RandomState): random_state is the random number generator
steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
build the pipeline. If provided, they won't be dynamically produced.
include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to honor during the creation of the configuration space.
exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to avoid during the creation of the configuration space.
random_state (np.random.RandomState): allows to produce reproducible results by
setting a seed for randomized settings
init_params (Optional[Dict[str, Any]])
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
Attributes:
Examples
Expand Down
20 changes: 17 additions & 3 deletions autoPyTorch/pipeline/traditional_tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from autoPyTorch.pipeline.base_pipeline import BasePipeline
from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates


class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
Expand All @@ -19,7 +20,19 @@ class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
Args:
config (Configuration)
The configuration to evaluate.
random_state (Optional[RandomState): random_state is the random number generator
steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
build the pipeline. If provided, they won't be dynamically produced.
include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to honor during the creation of the configuration space.
exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
to avoid during the creation of the configuration space.
random_state (np.random.RandomState): allows to produce reproducible results by
setting a seed for randomized settings
init_params (Optional[Dict[str, Any]])
search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
search space updates that can be used to modify the search
space of particular components or choice modules of the pipeline
Attributes:
"""
Expand All @@ -32,11 +45,12 @@ def __init__(
include: Optional[Dict[str, Any]] = None,
exclude: Optional[Dict[str, Any]] = None,
random_state: Optional[np.random.RandomState] = None,
init_params: Optional[Dict[str, Any]] = None
init_params: Optional[Dict[str, Any]] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
):
super().__init__(
config, steps, dataset_properties, include, exclude,
random_state, init_params)
random_state, init_params, search_space_updates)

def predict(self, X: np.ndarray, batch_size: Optional[int] = None
) -> np.ndarray:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@
optimize_metric='accuracy',
total_walltime_limit=300,
func_eval_time_limit_secs=50,
# Setting this option to True
# Setting this option to "greedy"
# will make smac run the configurations
# present in 'autoPyTorch/optimizer/greedy_portfolio.json'
run_greedy_portfolio=True
portfolio_selection="greedy"
)

############################################################################
Expand Down

0 comments on commit 9929fb0

Please sign in to comment.