Address comments from shuhei, change run_greedy to portfolio_selection

automl · May 17, 2021 · 9929fb0 · 9929fb0
1 parent af8fda6
commit 9929fb0
Show file tree

Hide file tree

Showing 10 changed files with 87 additions and 27 deletions.
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -121,6 +121,9 @@ class BaseTask:
         exclude_components (Optional[Dict]): If None, all possible components are used.
             Otherwise specifies set of components not to use. Incompatible with include
             components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
 
     def __init__(
@@ -702,7 +705,7 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        run_greedy_portfolio: bool = False
+        portfolio_selection: str = "none"
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -773,12 +776,12 @@ def _search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            run_greedy_portfolio (bool), (default=False): If True,
+            portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/optimizer/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         Returns:
             self
 
@@ -964,7 +967,7 @@ def _search(
                 # smac does internally
                 start_num_run=self._backend.get_next_num_run(peek=True),
                 search_space_updates=self.search_space_updates,
-                run_greedy_portfolio=run_greedy_portfolio
+                portfolio_selection=portfolio_selection
             )
             try:
                 run_history, self.trajectory, budget_type = \

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -57,6 +57,9 @@ class TabularClassificationTask(BaseTask):
             If None, all possible components are used. Otherwise
             specifies set of components not to use. Incompatible
             with include components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
     def __init__(
         self,
@@ -131,7 +134,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        run_greedy_portfolio: bool = False
+        portfolio_selection: str = "none"
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -200,12 +203,12 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            run_greedy_portfolio (bool), (default=False): If True,
+            portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/optimizer/greedy_portfolio.json'.
                 These configurations are the best performing configurations
                 when search was performed on meta training datasets.
-                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
 
         Returns:
             self
@@ -252,7 +255,7 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
-            run_greedy_portfolio=run_greedy_portfolio
+            portfolio_selection=portfolio_selection
         )
 
     def predict(

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -48,6 +48,9 @@ class TabularRegressionTask(BaseTask):
         exclude_components (Optional[Dict]): If None, all possible components are used.
             Otherwise specifies set of components not to use. Incompatible with include
             components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
 
     def __init__(
@@ -123,7 +126,7 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
-        run_greedy_portfolio: bool = False
+        portfolio_selection: str = "none"
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -188,12 +191,12 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-            run_greedy_portfolio (bool), (default=False): If True,
-                            runs initial configurations present in
-                            'autoPyTorch/optimizer/greedy_portfolio.json'.
-                            These configurations are the best performing configurations
-                            when search was performed on meta training datasets.
-                            For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
+            portfolio_selection (str), (default="none"): If "greedy",
+                runs initial configurations present in
+                'autoPyTorch/optimizer/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
         Returns:
             self
 
@@ -239,7 +242,7 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
-            run_greedy_portfolio=run_greedy_portfolio
+            portfolio_selection=portfolio_selection
         )
 
     def predict(

diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
@@ -109,7 +109,7 @@ def __init__(self,
                  ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
                  logger_port: typing.Optional[int] = None,
                  search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
-                 run_greedy_portfolio: bool = False
+                 portfolio_selection: str = "none"
                  ):
         """
         Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -158,7 +158,7 @@ def __init__(self,
                 Allows to create a user specified SMAC object
             ensemble_callback (typing.Optional[EnsembleBuilderManager]):
                 A callback used in this scenario to start ensemble building subtasks
-            run_greedy_portfolio (bool), (default=False): If True,
+            portfolio_selection (str), (default="none"): If "greedy",
                 runs initial configurations present in
                 'autoPyTorch/optimizer/greedy_portfolio.json'.
         """
@@ -217,7 +217,8 @@ def __init__(self,
         initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))
 
         self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
-        if run_greedy_portfolio:
+        assert portfolio_selection in ['none', 'greedy']
+        if portfolio_selection == "greedy":
             self.initial_configurations = list()
             for configuration_dict in initial_configurations:
                 try:

diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
@@ -41,6 +41,9 @@ class BasePipeline(Pipeline):
         random_state (np.random.RandomState): allows to produce reproducible results by
             setting a seed for randomized settings
         init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
 
     Attributes:

diff --git a/autoPyTorch/pipeline/image_classification.py b/autoPyTorch/pipeline/image_classification.py
@@ -40,7 +40,18 @@ class ImageClassificationPipeline(ClassifierMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
     Attributes:
     Examples

diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
@@ -60,7 +60,18 @@ class TabularClassificationPipeline(ClassifierMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
     Attributes:
     Examples

diff --git a/autoPyTorch/pipeline/tabular_regression.py b/autoPyTorch/pipeline/tabular_regression.py
@@ -58,7 +58,18 @@ class TabularRegressionPipeline(RegressorMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
 
     Attributes:
     Examples

diff --git a/autoPyTorch/pipeline/traditional_tabular_classification.py b/autoPyTorch/pipeline/traditional_tabular_classification.py
@@ -10,6 +10,7 @@
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
 from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
+from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
 
 
 class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
@@ -19,7 +20,19 @@ class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
     Args:
         config (Configuration)
             The configuration to evaluate.
-        random_state (Optional[RandomState): random_state is the random number generator
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
+
 
     Attributes:
     """
@@ -32,11 +45,12 @@ def __init__(
         include: Optional[Dict[str, Any]] = None,
         exclude: Optional[Dict[str, Any]] = None,
         random_state: Optional[np.random.RandomState] = None,
-        init_params: Optional[Dict[str, Any]] = None
+        init_params: Optional[Dict[str, Any]] = None,
+        search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
     ):
         super().__init__(
             config, steps, dataset_properties, include, exclude,
-            random_state, init_params)
+            random_state, init_params, search_space_updates)
 
     def predict(self, X: np.ndarray, batch_size: Optional[int] = None
                 ) -> np.ndarray:

diff --git a/...40_advanced/example_run_with_portfolio.py → ...40_advanced/example_run_with_portfolio.py b/...40_advanced/example_run_with_portfolio.py → ...40_advanced/example_run_with_portfolio.py
@@ -54,10 +54,10 @@
         optimize_metric='accuracy',
         total_walltime_limit=300,
         func_eval_time_limit_secs=50,
-        # Setting this option to True
+        # Setting this option to "greedy"
         # will make smac run the configurations
         # present in 'autoPyTorch/optimizer/greedy_portfolio.json'
-        run_greedy_portfolio=True
+        portfolio_selection="greedy"
     )
 
     ############################################################################