automl · franchuterivera · May 31, 2021 · May 5, 2021 · May 6, 2021 · May 7, 2021
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -121,6 +121,9 @@ class BaseTask:
         exclude_components (Optional[Dict]): If None, all possible components are used.
             Otherwise specifies set of components not to use. Incompatible with include
             components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
 
     def __init__(
@@ -702,6 +705,8 @@ def _search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        portfolio_selection: str = "none",
+        portfolio_path: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -772,7 +777,14 @@ def _search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
-
+            portfolio_selection (str), (default="none"): If "greedy",
+                runs initial configurations present in
+                'autoPyTorch/configs/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
         Returns:
             self
 
@@ -781,6 +793,9 @@ def _search(
             raise ValueError("Incompatible dataset entered for current task,"
                              "expected dataset to have task type :{} got "
                              ":{}".format(self.task_type, dataset.task_type))
+        if portfolio_selection not in ["none", "greedy"]:
+            raise ValueError("Expected portfolio_selection to be in ['none', 'greedy']"
+                             "got {}".format(portfolio_selection))
 
         # Initialise information needed for the experiment
         experiment_task_name: str = 'runSearch'
@@ -957,7 +972,9 @@ def _search(
                 # We do not increase the num_run here, this is something
                 # smac does internally
                 start_num_run=self._backend.get_next_num_run(peek=True),
-                search_space_updates=self.search_space_updates
+                search_space_updates=self.search_space_updates,
+                portfolio_selection=portfolio_selection,
+                portfolio_path=portfolio_path
             )
             try:
                 run_history, self.trajectory, budget_type = \

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -57,6 +57,9 @@ class TabularClassificationTask(BaseTask):
             If None, all possible components are used. Otherwise
             specifies set of components not to use. Incompatible
             with include components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
     def __init__(
         self,
@@ -131,6 +134,8 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        portfolio_selection: str = "none",
+        portfolio_path: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -143,21 +148,21 @@ def search(
                 A pair of features (X_train) and targets (y_train) used to fit a
                 pipeline. Additionally, a holdout of this pairs (X_test, y_test) can
                 be provided to track the generalization performance of each stage.
-            optimize_metric (str): name of the metric that is used to
-                evaluate a pipeline.
+            optimize_metric (str):
+                name of the metric that is used to evaluate a pipeline.
             budget_type (Optional[str]):
                 Type of budget to be used when fitting the pipeline.
                 Either 'epochs' or 'runtime'. If not provided, uses
                 the default in the pipeline config ('epochs')
             budget (Optional[float]):
                 Budget to fit a single run of the pipeline. If not
                 provided, uses the default in the pipeline config
-            total_walltime_limit (int), (default=100): Time limit
-                in seconds for the search of appropriate models.
+            total_walltime_limit (int), (default=100):
+                Time limit in seconds for the search of appropriate models.
                 By increasing this value, autopytorch has a higher
                 chance of finding better models.
-            func_eval_time_limit_secs (int), (default=None): Time limit
-                for a single call to the machine learning model.
+            func_eval_time_limit_secs (int), (default=None):
+                Time limit for a single call to the machine learning model.
                 Model fitting will be terminated if the machine
                 learning algorithm runs over the time limit. Set
                 this value high enough so that typical machine
@@ -174,32 +179,40 @@ def search(
                 feature by turning this flag to False. All machine learning
                 algorithms that are fitted during search() are considered for
                 ensemble building.
-            memory_limit (Optional[int]), (default=4096): Memory
-                limit in MB for the machine learning algorithm. autopytorch
+            memory_limit (Optional[int]), (default=4096):
+                Memory limit in MB for the machine learning algorithm. autopytorch
                 will stop fitting the machine learning algorithm if it tries
                 to allocate more than memory_limit MB. If None is provided,
                 no memory limit is set. In case of multi-processing, memory_limit
                 will be per job. This memory limit also applies to the ensemble
                 creation process.
-            smac_scenario_args (Optional[Dict]): Additional arguments inserted
-                into the scenario of SMAC. See the
+            smac_scenario_args (Optional[Dict]):
+                Additional arguments inserted into the scenario of SMAC. See the
                 [SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)
-            get_smac_object_callback (Optional[Callable]): Callback function
-                to create an object of class
+            get_smac_object_callback (Optional[Callable]):
+                Callback function to create an object of class
                 [smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).
                 The function must accept the arguments scenario_dict,
                 instances, num_params, runhistory, seed and ta. This is
                 an advanced feature. Use only if you are familiar with
                 [SMAC](https://automl.github.io/SMAC3/master/index.html).
-            all_supported_metrics (bool), (default=True): if True, all
-                metrics supporting current task will be calculated
+            all_supported_metrics (bool), (default=True):
+                if True, all metrics supporting current task will be calculated
                 for each pipeline and results will be available via cv_results
             precision (int), (default=32): Numeric precision used when loading
                 ensemble data. Can be either '16', '32' or '64'.
             disable_file_output (Union[bool, List]):
-            load_models (bool), (default=True): Whether to load the
-                models after fitting AutoPyTorch.
-
+            load_models (bool), (default=True):
+                Whether to load the models after fitting AutoPyTorch.
+            portfolio_selection (str), (default="none"):
+                If "greedy", runs initial configurations present in
+                portfolio_path, if specified else, those in
+                'autoPyTorch/configs/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
         Returns:
             self
 
@@ -245,6 +258,8 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
+            portfolio_selection=portfolio_selection,
+            portfolio_path=portfolio_path
         )
 
     def predict(

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -48,6 +48,9 @@ class TabularRegressionTask(BaseTask):
         exclude_components (Optional[Dict]): If None, all possible components are used.
             Otherwise specifies set of components not to use. Incompatible with include
             components
+        search_space_updates (Optional[HyperparameterSearchSpaceUpdates]):
+            search space updates that can be used to modify the search
+            space of particular components or choice modules of the pipeline
     """
 
     def __init__(
@@ -123,6 +126,8 @@ def search(
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
+        portfolio_selection: str = "none",
+        portfolio_path: Optional[str] = None
     ) -> 'BaseTask':
         """
         Search for the best pipeline configuration for the given dataset.
@@ -187,6 +192,14 @@ def search(
             disable_file_output (Union[bool, List]):
             load_models (bool), (default=True): Whether to load the
                 models after fitting AutoPyTorch.
+            portfolio_selection (str), (default="none"): If "greedy",
+                runs initial configurations present in
+                'autoPyTorch/configs/greedy_portfolio.json'.
+                These configurations are the best performing configurations
+                when search was performed on meta training datasets.
+                For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>`
+            portfolio_path (Optional[str]):
+                Optional argument to specify path to a portfolio file.
 
         Returns:
             self
@@ -233,6 +246,8 @@ def search(
             precision=precision,
             disable_file_output=disable_file_output,
             load_models=load_models,
+            portfolio_selection=portfolio_selection,
+            portfolio_path=portfolio_path
         )
 
     def predict(