diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index bab4759dd..602cafad2 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12', '3.13'] os: [ubuntu-latest, macos-latest, windows-latest] defaults: run: @@ -35,4 +35,4 @@ jobs: cache-dependency-glob: "**/pyproject.toml" - name: run tests run: uv run --all-extras pytest -m "" # Run all markers - + diff --git a/.gitignore b/.gitignore index 651960faf..3ca497eeb 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ results neps_examples/results tests_tmpdir usage_example +lightning_logs # Regression tests !losses.json diff --git a/docs/doc_images/examples/val_loss_image_segmentation.jpg b/docs/doc_images/examples/val_loss_image_segmentation.jpg new file mode 100644 index 000000000..5a37ef6d3 Binary files /dev/null and b/docs/doc_images/examples/val_loss_image_segmentation.jpg differ diff --git a/docs/doc_images/optimizers/venn_dia.svg b/docs/doc_images/optimizers/venn_dia.svg deleted file mode 100644 index 40f680614..000000000 --- a/docs/doc_images/optimizers/venn_dia.svg +++ /dev/null @@ -1,26 +0,0 @@ -
Successive Halving
HyperBand
ASHA
Mobster
IfBO

PiBO
PriorBand
PB+BO
SH+Prior
ASHA+Prior

Multi-Fidelity
Priors
\ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md index 447c2b1af..643abf9b6 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -51,23 +51,22 @@ neps.run(evaluate_pipeline, pipeline_space) The [reference](reference/neps_run.md) section provides detailed information on the individual components of NePS. -1. How to use the **[`neps.run()` function](reference/neps_run.md)** to start the optimization process. +1. How to use the [**`neps.run()`** function](reference/neps_run.md) to start the optimization process. 2. The different [search space](reference/pipeline_space.md) options available. 3. How to choose and configure the [optimizer](reference/optimizers.md) used. -4. [Declarative usage](reference/declarative_usage.md) of NePS via YAML configuration files. -5. How to define the [`evaluate_pipeline()` function](reference/evaluate_pipeline.md). -6. How to use the [CLI](reference/cli.md) to run NePS from the command line. -7. How to [analyze](reference/analyse.md) the optimization runs. +4. How to define the [`evaluate_pipeline()` function](reference/evaluate_pipeline.md). +5. How to use the [CLI](reference/cli.md) to run NePS from the command line. +6. How to [analyze](reference/analyse.md) the optimization runs. Or discover the features of NePS through these practical examples: -* **[Hyperparameter Optimization (HPO)](examples/template/basic.md)**: +* **[Hyperparameter Optimization (HPO)](examples/basic_usage/hyperparameters.md)**: Learn the essentials of hyperparameter optimization with NePS. * **[Multi-Fidelity Optimization](examples/efficiency/multi_fidelity.md)**: Understand how to leverage multi-fidelity optimization for efficient model tuning. -* **[Utilizing Expert Priors for Hyperparameters](examples/template/priorband.md)**: +* **[Utilizing Expert Priors for Hyperparameters](examples/efficiency/expert_priors_for_hyperparameters.md)**: Learn how to incorporate expert priors for more efficient hyperparameter selection. * **[Additional NePS Examples](examples/index.md)**: diff --git a/docs/index.md b/docs/index.md index c46516924..d6e618e19 100644 --- a/docs/index.md +++ b/docs/index.md @@ -46,10 +46,10 @@ pip install neural-pipeline-search Using `neps` always follows the same pattern: -1. Define a `evalute_pipeline` function capable of evaluating different architectural and/or hyperparameter configurations +1. Define a `evaluate_pipeline` function capable of evaluating different architectural and/or hyperparameter configurations for your problem. 1. Define a search space named `pipeline_space` of those Parameters e.g. via a dictionary -1. Call `neps.run` to optimize `evalute_pipeline` over `pipeline_space` +1. Call `neps.run` to optimize `evaluate_pipeline` over `pipeline_space` In code, the usage pattern can look like this: @@ -59,7 +59,7 @@ import logging # 1. Define a function that accepts hyperparameters and computes the validation error -def evalute_pipeline( +def evaluate_pipeline( hyperparameter_a: float, hyperparameter_b: int, architecture_parameter: str ) -> dict: # Create your model @@ -72,7 +72,7 @@ def evalute_pipeline( return validation_error -# 2. Define a search space of parameters; use the same parameter names as in evalute_pipeline +# 2. Define a search space of parameters; use the same parameter names as in evaluate_pipeline pipeline_space = dict( hyperparameter_a=neps.Float( lower=0.001, upper=0.1, log=True # The search space is sampled in log space @@ -84,7 +84,7 @@ pipeline_space = dict( # 3. Run the NePS optimization logging.basicConfig(level=logging.INFO) neps.run( - evalute_pipeline=evalute_pipeline, + evaluate_pipeline=evaluate_pipeline, pipeline_space=pipeline_space, root_directory="path/to/save/results", # Replace with the actual path. max_evaluations_total=100, diff --git a/docs/reference/analyse.md b/docs/reference/analyse.md index 0ccab5b03..3fd8f694b 100644 --- a/docs/reference/analyse.md +++ b/docs/reference/analyse.md @@ -51,13 +51,12 @@ NePS will also generate a summary CSV file for you. │ └── config_1 │ ├── config.yaml │ ├── metadata.yaml - │ └── result.yaml - ├── summary_csv - │ ├── config_data.csv - │ └── run_status.csv - ├── all_losses_and_configs.txt - ├── best_loss_trajectory.txt - └── best_loss_with_config_trajectory.txt + │ └── report.yaml + ├── summary + │ ├── full.csv + │ └── short.csv + ├── optimizer_info.yaml + └── optimizer_state.pkl ``` @@ -69,17 +68,17 @@ NePS will also generate a summary CSV file for you. │ └── config_1 │ ├── config.yaml │ ├── metadata.yaml - │ └── result.yaml - ├── all_losses_and_configs.txt - ├── best_loss_trajectory.txt - └── best_loss_with_config_trajectory.txt + │ └── report.yaml + ├── optimizer_info.yaml + └── optimizer_state.pkl ``` -The `config_data.csv` contains all configuration details in CSV format, ordered by ascending `loss`. -Details include configuration hyperparameters, any returned result from the `evalute_pipeline` function, and metadata information. +The `full.csv` contains all configuration details in CSV format. +Details include configuration hyperparameters and any returned result and cost from the `evaluate_pipeline` function. -The `run_status.csv` provides general run details, such as the number of sampled configs, best configs, number of failed configs, best loss, etc. +The `run_status.csv` provides general run details, such as the number of failed and successful configurations, +and the best configuration with its corresponding objective value. # TensorBoard Integration diff --git a/docs/reference/cli.md b/docs/reference/cli.md index a3febcdc6..46952155e 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -35,14 +35,14 @@ Executes the optimization based on the provided configuration. This command serv - `--evaluate-pipeline` (Optional): Optional: Provide the path to a Python file and a function name separated by a colon, e.g., 'path/to/module.py:function_name'. If provided, it overrides the evaluate_pipeline setting from the YAML configuration. - `--pipeline-space` (Optional): Path to the YAML file defining the search space for the optimization. This can be provided here or defined within the 'run_args' YAML file. - `--root-directory` (Optional): The directory to save progress to. This is also used to synchronize multiple calls for parallelization. -- `--overwrite-working-directory` (Optional): If set, deletes the working directory at the start of the run. This is useful, for example, when debugging a evalute_pipeline function. +- `--overwrite-working-directory` (Optional): If set, deletes the working directory at the start of the run. This is useful, for example, when debugging a evaluate_pipeline function. - `--post-run-summary` (Optional): Provide a summary of the results after running. - `--no-post-run-summary` (Optional): Do not provide a summary of the results after running. - `--max-evaluations-total` (Optional): Total number of evaluations to run. - `--max-evaluations-per-run` (Optional): Number of evaluations a specific call should maximally do. - `--continue-until-max-evaluation-completed` (Optional): If set, only stop after max-evaluations-total have been completed. This is only relevant in the parallel setting. - `--max-cost-total` (Optional): No new evaluations will start when this cost is exceeded. Requires returning a cost - in the evalute_pipeline function. + in the evaluate_pipeline function. - `--ignore-errors` (Optional): If set, ignore errors during the optimization process. - `--loss-value-on-error` (Optional): Loss value to assume on error. - `--cost-value-on-error` (Optional): Cost value to assume on error. diff --git a/docs/reference/declarative_usage.md b/docs/reference/declarative_usage.md deleted file mode 100644 index 57b53c476..000000000 --- a/docs/reference/declarative_usage.md +++ /dev/null @@ -1,148 +0,0 @@ -## Introduction -If you prefer to use yaml for experiment configuration, -[`neps.run()`][neps.api.run] supports yaml serialized input. - -We make no assumption on how you'd like to structure you experimentation -and you are free to run it as you wish! - -Please check [`neps.run()`][neps.api.run] for complete information on the arguments. - -#### Simple YAML Example -Below is a YAML configuration example for NePS covering the required arguments. -The arguments match those that you can pass to [`neps.run()`][neps.api.run]. - -In general, you can encode any [`Parameter`][neps.space.Parameter] into a YAML format. - -=== "config.yaml" - - ```yaml - # Basic NePS Configuration Example - pipeline_space: - - batch_size: 64 # Constant - - optimizer: [adam, sgd, adamw] # Categorical - - alpha: [0.01, 1.0] # Uniform Float - - n_layers: [1, 10] # Uniform Integer - - learning_rate: # Log scale Float with a prior - lower: 1e-5 - upper: 1e-1 - log: true - prior: 1e-3 - prior_confidence: high - - epochs: # Integer fidelity - lower: 5 - upper: 20 - is_fidelity: true - - root_directory: path/to/results # Directory for result storage - max_evaluations_total: 20 # Budget - - optimizer: - name: hyperband # Which optimizer to use - ``` - -=== "run_neps.py" - - ```python - import neps - import yaml - - def evaluate_pipeline(learning_rate, optimizer, epochs, batch_size): - model = initialize_model() - training_loss = train_model(model, optimizer, learning_rate, epochs) - evaluation_loss = evaluate_model(model) - return {"objective_to_minimize": evaluation_loss, "training_loss": training_loss} - - if __name__ == "__main__": - with open("path/config.yaml") as f: - settings = yaml.safe_load(f) - - neps.run(evaluate_pipeline, **settings) - ``` - -!!! tip "Merging multiple yaml files" - - If you would like to seperate parts of your configuration into multiple yamls, - for example, to seperate out your search spaces and optimizers, - you can use the `neps.load_yamls` function to merge them, checking for conflicts. - - ```python - import neps - - def evaluate_pipeline(...): - ... - - if __name__ == "__main__": - settings = neps.load_yamls("path/to/your/config.yaml", "path/to/your/optimizer.yaml") - neps.run(evaluate_pipeline, **settings) - ``` - - -#### Comprehensive YAML Configuration Template -This example showcases a more comprehensive YAML configuration, which includes not only the essential parameters -but also advanced settings for more complex setups. - -=== "config.yaml" - - ```yaml - # Full Configuration Template for NePS - evaluate_pipeline: path/to/your/evaluate_pipeline.py::example_pipeline - - pipeline_space: - learning_rate: - lower: 1e-5 - upper: 1e-1 - log: true - epochs: - lower: 5 - upper: 20 - is_fidelity: true - optimizer: - choices: [adam, sgd, adamw] - batch_size: 64 - - root_directory: path/to/results # Directory for result storage - max_evaluations_total: 20 # Budget - max_cost_total: - - # Debug and Monitoring - overwrite_working_directory: true - post_run_summary: false - - # Parallelization Setup - max_evaluations_per_run: - continue_until_max_evaluation_completed: false - - # Error Handling - objective_value_on_error: - cost_value_on_error: - ignore_errors: - - optimizer: - name: hyperband - ``` - -=== "run_neps.py" - - ```python - - if __name__ == "__main__": - import neps - - with open("path/config.yaml") as f: - settings = yaml.safe_load(f) - - # Note, we specified our run function in the yaml itself! - neps.run(**settings) - ``` - -## CLI Usage - -!!! warning "CLI Usage" - - The CLI is still in development and may not be fully functional. diff --git a/docs/reference/evaluate_pipeline.md b/docs/reference/evaluate_pipeline.md index b4779cffa..f23b6d663 100644 --- a/docs/reference/evaluate_pipeline.md +++ b/docs/reference/evaluate_pipeline.md @@ -1,4 +1,4 @@ -# The run function +# The evaluate function ## Introduction @@ -108,7 +108,7 @@ Each evaluation carries a cost of 2. Hence in this example, the Bayesian optimiz NePS also provides the `pipeline_directory` and the `previous_pipeline_directory` as arguments in the `evaluate_pipeline=` function for user convenience. -Regard an example to be run with a multi-fidelity optimizer, some checkpointing would be advantageos such that one does not have to train the configuration from scratch when the configuration qualifies to higher fidelity brackets. +Regard an example to be run with a multi-fidelity optimizer, some checkpointing would be advantageous such that one does not have to train the configuration from scratch when the configuration qualifies to higher fidelity brackets. ```python def evaluate_pipeline( @@ -116,31 +116,31 @@ def evaluate_pipeline( previous_pipeline_directory, # The directory of the immediate lower fidelity config **config, # The hyperparameters to be used in the pipeline ): - # Assume element3 is our fidelity element + # Assume the third element is our fidelity element element_1 = config["element_1"] element_2 = config["element_2"] - element_3 = config["element_3"] + fidelity = config["fidelity"] # Load any saved checkpoints checkpoint_name = "checkpoint.pth" - start_element_3 = 0 + start_fidelity = 0 if previous_pipeline_directory is not None: # Read in state of the model after the previous fidelity rung checkpoint = torch.load(previous_pipeline_directory / checkpoint_name) - prev_element_3 = checkpoint["element_3"] + prev_fidelity = checkpoint["fidelity"] else: - prev_element_3 = 0 + prev_fidelity = 0 - start_element_3 += prev_element_3 + start_fidelity += prev_fidelity loss = 0 - for i in range(start_element_3, element_3): + for i in range(start_fidelity, fidelity): loss += element_1 - element_2 torch.save( { - "element_3": element_3, + "fidelity": fidelity, }, pipeline_directory / checkpoint_name, ) diff --git a/docs/reference/neps_run.md b/docs/reference/neps_run.md index 2885770ed..5e023253f 100644 --- a/docs/reference/neps_run.md +++ b/docs/reference/neps_run.md @@ -15,13 +15,13 @@ To operate, NePS requires at minimum the following two arguments ```python import neps -def run(learning_rate: float, epochs: int) -> float: +def evaluate_pipeline(learning_rate: float, epochs: int) -> float: # Your code here return loss neps.run( - evaluate_pipeline=run, # (1)! + evaluate_pipeline=evaluate_pipeline, # (1)! pipeline_space={, # (2)! "learning_rate": neps.Float(1e-3, 1e-1, log=True), "epochs": neps.Integer(10, 100) @@ -47,11 +47,11 @@ See the following for more: ## Budget, how long to run? To define a budget, provide `max_evaluations_total=` to [`neps.run()`][neps.api.run], to specify the total number of evaluations to conduct before halting the optimization process, -or `max_cost_total=` to specify a cost threshold for your own custom cost metric, such as time, energy, or monetary. +or `max_cost_total=` to specify a cost threshold for your own custom cost metric, such as time, energy, or monetary, as returned by each evaluation of the pipeline . ```python -def run(learning_rate: float, epochs: int) -> float: +def evaluate_pipeline(learning_rate: float, epochs: int) -> float: start = time.time() # Your code here @@ -68,7 +68,7 @@ neps.run( 1. Specifies the total number of evaluations to conduct before halting the optimization process. 2. Prevents the initiation of new evaluations once this cost threshold is surpassed. This can be any kind of cost metric you like, such as time, energy, or monetary, as long as you can calculate it. - This requires adding a cost value to the output of the `evaluate_pipeline` function, for example, return `#!python {'loss': loss, 'cost': cost}`. + This requires adding a cost value to the output of the `evaluate_pipeline` function, for example, return `#!python {'objective_to_minimize': loss, 'cost': cost}`. For more details, please refer [here](../reference/evaluate_pipeline.md) ## Getting some feedback, logging @@ -138,14 +138,14 @@ while will generate a summary csv after the run has finished. ├── configs │ ├── config_1 │ │ ├── config.yaml # The configuration - │ │ ├── report.yaml # The results of this run, if any + │ │ ├── report.yaml # The results of this run, if any │ │ └── metadata.json # Metadata about this run, such as state and times │ └── config_2 │ ├── config.yaml │ └── metadata.json - ├── summary_csv # Only if post_run_summary=True - │ ├── config_data.csv - │ └── run_status.csv + ├── summary # Only if post_run_summary=True + │ ├── full.csv + │ └── short.csv ├── optimizer_info.yaml # The optimizer's configuration └── optimizer_state.pkl # The optimizer's state, shared between workers ``` @@ -160,6 +160,7 @@ To capture the results of the optimization process, you can use tensorbaord logg closer to NePS. For more information, please refer to the [analyses page](../reference/analyse.md) page. ## Parallelization + NePS utilizes the file-system and locks as a means of communication for implementing parallelization and resuming runs. As a result, you can start multiple [`neps.run()`][neps.api.run] from different processes however you like and they will synchronize, **as long as they share the same `root_directory=`**. @@ -198,55 +199,8 @@ Any new workers that come online will automatically pick up work and work togeth python worker.py & ``` -## YAML Configuration -We support arguments to [`neps.run()`][neps.api.run] that have been seriliazed into a -YAML file. This means you can manage your configurations in a more human-readable format -if you prefer. - -For more on yaml usage, please visit the dedicated -[page on usage of YAML with NePS](../reference/declarative_usage.md). - - -=== "`config.yaml`" - - ```yaml - # We allow specifying the evaluate_pipeline as a module path and function name - evaluate_pipeline: path/to/evaluate_pipeline.py:eval_func_name - - pipeline_space: - batch_size: 64 # Constant - optimizer: [adam, sgd, adamw] # Categorical - alpha: [0.01, 1.0] # Uniform Float - n_layers: [1, 10] # Uniform Integer - learning_rate: # Log scale Float with a prior - lower: 1e-5 - upper: 1e-1 - log: true - prior: 1e-3 - prior_confidence: high - epochs: # Integer fidelity - lower: 5 - upper: 20 - is_fidelity: true - - root_directory: "neps_results" # Output directory for results - max_evaluations_total: 100 - optimizer: - name: "bayesian_optimization" - initial_design_size: 5 - cost_aware: true - ``` - -=== "`run_neps.py`" - - ```python - with open("config.yaml", "r") as file: - settings = yaml.safe_load(file) - - neps.run(**settings) - ``` - ## Handling Errors + Things go wrong during optimization runs and it's important to consider what to do in these cases. By default, NePS will halt the optimization process when an error but you can choose to `ignore_errors=`, providing a `loss_value_on_error=` and `cost_value_on_error=` to control what values should be @@ -276,19 +230,20 @@ neps.run( Any runs that error will still count towards the total `max_evaluations_total` or `max_evaluations_per_run`. ### Re-running Failed Configurations + Sometimes things go wrong but not due to the configuration itself. Sometimes you'd also like to change the state so that you re-evaluate that configuration. If you need to go in there and change anything, **the entire optimization state** is editable on disk. You can follow these steps to modify the state of things. -``` +``` python root_directory ├── configs │ ├── .trial_cache.pkl # A cache of all trial information for optimizers │ ├── config_1 │ │ ├── config.yaml # The configuration -│ │ ├── report.yaml # The results of this run, if any +│ │ ├── report.yaml # The results of this run, if any │ │ ├── metadata.json # Metadata about this run, such as state and times │ └── config_2 │ ├── config.yaml @@ -298,11 +253,11 @@ root_directory ``` 1. The first thing you should do is make sure no workers are running. -2. Next, delete `optimizer_state.pkl` and `configs/.trial_cache.pkl`. This is cached information to share betwen the +2. Next, delete `optimizer_state.pkl` and `configs/.trial_cache.pkl`. This is cached information to share between the workers. 3. Lastly, you can go in and modify any of the following files: - * `config.yaml` - The configuration to be run. This was samled from your search space. + * `config.yaml` - The configuration to be run. This was sampled from your search space. * `report.yaml` - The results of the run. This is where you can change what was reported back. * `metadata.json` - Metadata about the run. Here you can change the `"state"` key to one of [`State`][neps.state.trial.State] to re-run the configuration, usually you'd want to set it diff --git a/docs/reference/optimizers.md b/docs/reference/optimizers.md index 91de58f05..3c6502624 100644 --- a/docs/reference/optimizers.md +++ b/docs/reference/optimizers.md @@ -40,17 +40,20 @@ For a more detailed explanation of Priors and a list of NePS-optimizers using Pr NePS provides a multitude of optimizers from the literature, the [algorithms](../reference/search_algorithms/landing_page_algo.md) section goes into detail on each of them. This chapter focusses on how to select them when using NePS. -| Algorithm | [Multi-Fidelity](../reference/search_algorithms/multifidelity.md) | [Priors](../reference/search_algorithms/prior.md) | Model-based | Asynchronous | -| :- | :------------: | :----: | :---------: | :-: | -| `Grid Search`||||✅| -| `Random Search`||||✅| -| [`Successive Halving`](../reference/search_algorithms/multifidelity.md#1-successive-halfing)|✅|||| -| [`ASHA`](../reference/search_algorithms/multifidelity.md#asynchronous-successive-halving)|✅|||✅| -| [`Hyperband`](../reference/search_algorithms/multifidelity.md#2-hyperband)|✅|||| -| [`Asynch HB`](../reference/search_algorithms/multifidelity.md)|✅|||✅| -| [`IfBO`](../reference/search_algorithms/multifidelity.md#5-in-context-freeze-thaw-bayesian-optimization)|✅||✅|| -| [`PiBO`](../reference/search_algorithms/prior.md#1-pibo)||✅|✅|| -| [`PriorBand`](../reference/search_algorithms/multifidelity_prior.md#1-priorband)|✅|✅|✅|| +✅ = supported/necessary, ❌ = not supported, ✔️* = optional, click for details, ✖️\* ignorable, click for details + +| Algorithm | [Multi-Fidelity](../reference/search_algorithms/multifidelity.md) | [Priors](../reference/search_algorithms/prior.md) | Model-based | +| :- | :------------: | :----: | :---------: | +| `Grid Search`|[️️✖️*][neps.optimizers.algorithms.grid_search]|❌|❌| +| `Random Search`|[️️✖️*][neps.optimizers.algorithms.random_search]|[✔️*][neps.optimizers.algorithms.random_search]|❌| +| [`Bayesian Optimization`](../reference/search_algorithms/bayesian_optimization.md)|[️️✖️*][neps.optimizers.algorithms.bayesian_optimization]|❌|✅| +| [`Successive Halving`](../reference/search_algorithms/multifidelity.md#1-successive-halfing)|✅|[✔️*][neps.optimizers.algorithms.successive_halving]|❌| +| [`ASHA`](../reference/search_algorithms/multifidelity.md#asynchronous-successive-halving)|✅|[✔️*][neps.optimizers.algorithms.asha]|❌| +| [`Hyperband`](../reference/search_algorithms/multifidelity.md#2-hyperband)|✅|[✔️*][neps.optimizers.algorithms.hyperband]|❌| +| [`Asynch HB`](../reference/search_algorithms/multifidelity.md)|✅|[✔️*][neps.optimizers.algorithms.async_hb]|❌| +| [`IfBO`](../reference/search_algorithms/multifidelity.md#3-in-context-freeze-thaw-bayesian-optimization)|✅|[✔️*][neps.optimizers.algorithms.ifbo]|✅| +| [`PiBO`](../reference/search_algorithms/prior.md#1-pibo)|[️️✖️*][neps.optimizers.algorithms.pibo]|✅|✅| +| [`PriorBand`](../reference/search_algorithms/multifidelity_prior.md#1-priorband)|✅|✅|✅| If you prefer not to specify a particular optimizer for your AutoML task, you can simply pass `"auto"` or `None` for the neps optimizer. This provides a hassle-free way to get started quickly, as NePS will automatically choose the best optimizer based on the characteristics of your search @@ -81,7 +84,7 @@ You can use either the optimizer name or the optimizer class itself as the optim ```python neps.run( - evalute_pipeline=run_function, + evaluate_pipeline=run_function, pipeline_space=pipeline_space, root_directory="results/", max_evaluations_total=25, @@ -98,7 +101,7 @@ For users who want more control over the optimizer's hyperparameters, you can in ```python neps.run( - evalute_pipeline=run_function, + evaluate_pipeline=run_function, pipeline_space=pipeline_space, root_directory="results/", max_evaluations_total=25, @@ -131,7 +134,7 @@ The class is then passed to the `neps.run()` function just like the built-in opt ```python neps.run( - evalute_pipeline=run_function, + evaluate_pipeline=run_function, pipeline_space=pipeline_space, root_directory="results/", max_evaluations_total=25, diff --git a/docs/reference/pipeline_space.md b/docs/reference/pipeline_space.md index ee0955ed4..9844e42a3 100644 --- a/docs/reference/pipeline_space.md +++ b/docs/reference/pipeline_space.md @@ -1,9 +1,9 @@ # Initializing the Pipeline Space In NePS, we need to define a `pipeline_space`. -This space can be structured through various approaches, including a Python dictionary, a YAML file, or ConfigSpace. +This space can be structured through various approaches, including a Python dictionary, or ConfigSpace. Each of these methods allows you to specify a set of parameter types, ranging from Float and Categorical to specialized architecture parameters. -Whether you choose a dictionary, YAML file, or ConfigSpace, your selected method serves as a container or framework +Whether you choose a dictionary, or ConfigSpace, your selected method serves as a container or framework within which these parameters are defined and organized. This section not only guides you through the process of setting up your `pipeline_space` using these methods but also provides detailed instructions and examples on how to effectively incorporate various parameter types, ensuring that NePS can utilize them in the optimization process. @@ -61,9 +61,6 @@ By indicating a `prior=` we take this to be your user prior, You can also specify a `prior_confidence=` to indicate how strongly you want NePS, to focus on these, one of either `"low"`, `"medium"`, or `"high"`. -Currently the two major algorithms that exploit this in NePS are `PriorBand` -(prior-based `HyperBand`) and `PiBO`, a version of Bayesian Optimization which uses Priors. - ```python import neps @@ -77,57 +74,13 @@ neps.run( } ) ``` -!!! warning "Must set `prior=` for all parameters, if any" - - If you specify `prior=` for one parameter, you must do so for all your variables. - This will be improved in future versions. !!! warning "Interaction with `is_fidelity`" - If you specify `is_fidelity=True` for one parameter, the `prior=` and `prior_confidence=` are ignored. - This will be dissallowed in future versions. - -## Defining a pipeline space using YAML -Create a YAML file (e.g., `./pipeline_space.yaml`) with the parameter definitions following this structure. - -=== "`./pipeline_space.yaml`" - - ```yaml - learning_rate: - type: float - lower: 2e-3 - upper: 0.1 - log: true - - num_epochs: - type: int - lower: 3 - upper: 30 - is_fidelity: true - - optimizer: - type: categorical - choices: ["adam", "sgd", "rmsprop"] - - dropout_rate: 0.5 - ``` - -=== "`run.py`" - - ```python - neps.run(.., pipeline_space="./pipeline_space.yaml") - ``` - -When defining the `pipeline_space` using a YAML file, if the `type` argument is not specified, -the NePS will automatically infer the data type based on the value provided. - -* If `lower` and `upper` are provided, then if they are both integers, the type will be inferred as `int`, - otherwise as `float`. You can provide scientific notation for floating-point numbers as well. -* If `choices` are provided, the type will be inferred as `categorical`. -* If just a numeric or string is provided, the type will be inferred as `constant`. - -If none of these hold, an error will be raised. + If you specify `is_fidelity=True` and `prior=` for one parameter, this will raise an error. +Currently the two major algorithms that exploit this in NePS are `PriorBand` +(prior-based `HyperBand`) and `PiBO`, a version of Bayesian Optimization which uses Priors. For more information on priors and algorithms using them, please refer to the [prior documentation](../reference/search_algorithms/prior.md). ## Using ConfigSpace @@ -153,18 +106,3 @@ configspace = ConfigurationSpace( For additional information on ConfigSpace and its features, please visit the following [link](https://github.com/automl/ConfigSpace). - -## Supported Architecture parameter Types -A comprehensive documentation for the Architecture parameter is not available at this point. - -If you are interested in exploring architecture parameters, you can find detailed -examples and usage in the following resources: - -- [Basic Usage Examples](https://github.com/automl/neps/tree/master/neps_examples/basic_usage) - Basic usage - examples that can help you understand the fundamentals of Architecture parameters. -- [Experimental Examples](https://github.com/automl/neps/tree/master/neps_examples/experimental) - For more advanced - and experimental use cases, including Hierarchical parameters, check out this collection of examples. - -!!! warning - - The configuration of `pipeline_space` from a YAML file does not currently support architecture parameter types. diff --git a/docs/reference/search_algorithms/landing_page_algo.md b/docs/reference/search_algorithms/landing_page_algo.md index f8aa402f7..7f7be891e 100644 --- a/docs/reference/search_algorithms/landing_page_algo.md +++ b/docs/reference/search_algorithms/landing_page_algo.md @@ -4,17 +4,20 @@ Algorithms are the search strategies determining what configurations to evaluate We distinguish between algorithms that use different types of information and strategies to guide the search process: -| Algorithm | [Multi-Fidelity](landing_page_algo.md#what-is-multi-fidelity-optimization) | [Priors](landing_page_algo.md#what-are-priors) | [Model-based](bayesian_optimization.md) | Asynchronous (Parallelizable) | -| :- | :------------: | :----: | :---------: | :-: | -| `Grid Search`||||✅| -| `Random Search`||||✅| -| [`Successive Halving`](./multifidelity.md#1-successive-halfing)|✅|||| -| [`ASHA`](./multifidelity.md#asynchronous-successive-halving)|✅|||✅| -| [`Hyperband`](./multifidelity.md#2-hyperband)|✅|||| -| [`Asynch HB`](./multifidelity.md)|✅|||✅| -| [`IfBO`](./multifidelity.md#5-in-context-freeze-thaw-bayesian-optimization)|✅||✅|| -| [`PiBO`](./prior.md#1-pibo)||✅|✅|| -| [`PriorBand`](./multifidelity_prior.md#1-priorband)|✅|✅|✅|| +✅ = supported/necessary, ❌ = not supported, ✔️* = optional, click for details, ✖️\* ignorable, click for details + +| Algorithm | [Multi-Fidelity](../search_algorithms/multifidelity.md) | [Priors](../search_algorithms/prior.md) | Model-based | +| :- | :------------: | :----: | :---------: | +| `Grid Search`|[️️✖️*][neps.optimizers.algorithms.grid_search]|❌|❌| +| `Random Search`|[️️✖️*][neps.optimizers.algorithms.random_search]|[✔️*][neps.optimizers.algorithms.random_search]|❌| +| [`Bayesian Optimization`](../search_algorithms/bayesian_optimization.md)|[️️✖️*][neps.optimizers.algorithms.bayesian_optimization]|❌|✅| +| [`Successive Halving`](../search_algorithms/multifidelity.md#1-successive-halfing)|✅|[✔️*][neps.optimizers.algorithms.successive_halving]|❌| +| [`ASHA`](../search_algorithms/multifidelity.md#asynchronous-successive-halving)|✅|[✔️*][neps.optimizers.algorithms.asha]|❌| +| [`Hyperband`](../search_algorithms/multifidelity.md#2-hyperband)|✅|[✔️*][neps.optimizers.algorithms.hyperband]|❌| +| [`Asynch HB`](../search_algorithms/multifidelity.md)|✅|[✔️*][neps.optimizers.algorithms.async_hb]|❌| +| [`IfBO`](../search_algorithms/multifidelity.md#3-in-context-freeze-thaw-bayesian-optimization)|✅|[✔️*][neps.optimizers.algorithms.ifbo]|✅| +| [`PiBO`](../search_algorithms/prior.md#1-pibo)|[️️✖️*][neps.optimizers.algorithms.pibo]|✅|✅| +| [`PriorBand`](../search_algorithms/multifidelity_prior.md#1-priorband)|✅|✅|✅| ## What is Multi-Fidelity Optimization? @@ -45,4 +48,4 @@ Priors are used when there exists some information about the search space, that - **Less exploration**: By focusing on these regions, the optimizer _might_ miss out on other regions that could potentially be better. - **Bad priors**: If the Prior is not a good representation of the search space, the optimizer might deliver suboptimal results, compared to a search without Priors. The optimizers we provide in NePS are specifically designed to handle bad priors, but they still slow down the search process. -We present a collection of algorithms that use Priors [here](./prior.md) and algorithms that combine priors wiht Multi-Fidelity [here](./multifidelity_prior.md). +We present a collection of algorithms that use Priors [here](./prior.md) and algorithms that combine priors with Multi-Fidelity [here](./multifidelity_prior.md). diff --git a/docs/reference/search_algorithms/multifidelity.md b/docs/reference/search_algorithms/multifidelity.md index 51c53cc16..171b1cc41 100644 --- a/docs/reference/search_algorithms/multifidelity.md +++ b/docs/reference/search_algorithms/multifidelity.md @@ -9,12 +9,14 @@ This section concerns optimizers that utilize Multi-Fidelity information to guid It starts with a large number of random configurations and evaluates them on a low-fidelity. The best-performing $1/\eta$ configurations are then promoted to the next fidelity, where they are evaluated again. This process is repeated until only a few configurations remain, evaluated on the highest fidelity. The process allows for broad exploration in the beginning and focus on the most promising configurations towards the end. +See the algorithm's implementation details in the [api][neps.optimizers.algorithms.successive_halving]. + ??? example "Practical Tips" - For the same total compute, `SH` outperforms uninformed search algorithms like random search or grid search. - It highly depends on the correlation between lower and higher fidelities. If the correlation is low, `SH` underperforms. - `SH` has two parameters: $\eta$ and $n$, where $\eta$ is the promotion factor and $n$ is the number of configurations at the lowest fidelity. - This results in a total of $\frac{n*r}{\eta^r}$ steps (from one fidelity level to the next), where $r$ is the number of fidelity levels. + This results in a total of $\frac{n*r}{\eta^r}$ steps (from one fidelity level to the next), where $r$ is the number of fidelity levels. For more details, see the [api][neps.optimizers.algorithms.successive_halving]. ### _Asynchronous_ Successive Halving @@ -26,21 +28,26 @@ Instead of waiting for all $n$ configurations to finish on one fidelity, `ASHA` Although not inherently a Prior-optimizer, ``SH`` (and ``ASHA``) can make use of [Priors](../search_algorithms/prior.md). Instead of sampling configurations uniformly, the optimizer can directly sample from the Prior, which results in a more focused search - highly beneficial _if_ the Prior is reliable. Alternatively, the ``SH`` can bias the promotion of configurations towards the Prior, keeping worse-performing, but recommended configurations longer in the optimization process. +See the algorithm's implementation details in the [api][neps.optimizers.algorithms.asha]. + ## 2 `HyperBand` `HyperBand`/`HB` (see [paper](https://arxiv.org/pdf/1603.06560)) is an extension of [``Successive Halfing``](../search_algorithms/multifidelity.md#1-successive-halfing) that employs multiple ``Successive Halfing``-rounds in parallel. Each of these runs has a different resource budget and different number of configurations. This makes ``HyperBand`` more flexible and parallelizable than ``SH``. +See the algorithm's implementation details in the [api][neps.optimizers.algorithms.hyperband]. + ??? example "Practical Tips" - ``HyperBand`` is a good choice when you have a limited budget and want to parallelize your search. - It is more efficient than ``SH`` when the correlation between lower and higher fidelities is low. - - ``Hyperband`` has two parameters: $\eta$ (typically 3 or 4) and $R$, where $\eta$ is the promotion factor and $R$ is the maximum budget any single configuration will be trained on. A larger $R$ will result in better, but slower results, while a larger $\eta$ will result in faster, but more noisy, potentially worse results. HB then spawns $\lfloor \log_\eta(R)\rfloor$ ``Successive Halfing``-rounds. + - ``Hyperband`` has two parameters: $\eta$ (typically 3 or 4) and $R$, where $\eta$ is the promotion factor and $R$ is the maximum budget any single configuration will be trained on. A larger $R$ will result in better, but slower results, while a larger $\eta$ will result in faster, but more noisy, potentially worse results. HB then spawns $\lfloor \log_\eta(R)\rfloor$ ``Successive Halfing``-rounds. For more details, see the [api][neps.optimizers.algorithms.hyperband]. !!! info ``HyperBand`` is chosen as the [default optimizer](../../reference/optimizers.md#21-automatic-optimizer-selection) in NePS when there is no [Prior](../search_algorithms/prior.md), only Multi-Fidelity information available. + + +## 3 `In-Context Freeze-Thaw Bayesian Optimization` `In-Context Freeze-Thaw Bayesian Optimization`/``IfBO`` (see [paper](https://arxiv.org/pdf/2204.11051)) expands on the idea of [Freeze-Thaw Bayesian Optimization](https://arxiv.org/pdf/1406.3896) (``FT-BO``) by using a `Prior-data fitted network` (PFN) as a surrogate for the ``FT-BO``. -Standard ``FT-BO`` models the performance of a configuration with a Gaussian Process, assuming exponential loss decay. Similar to [A-BOHB](../search_algorithms/multifidelity.md#4-a-bohb), it uses this joint GP to fantasize results and decides for the most informative configurations. The ``Entropy Search``-acquisition function (see [paper](https://jmlr.csail.mit.edu/papers/volume13/hennig12a/hennig12a.pdf)) quantifies this information gain: +Standard ``FT-BO`` models the performance of a configuration with a Gaussian Process, assuming exponential loss decay. It uses this joint GP to fantasize results and decides for the most informative configurations. The ``Entropy Search``-acquisition function (see [paper](https://jmlr.csail.mit.edu/papers/volume13/hennig12a/hennig12a.pdf)) quantifies this information gain: $$ a(\boldsymbol{x}) = \int\left(H\left(P^y_{\min}\right)\right) - \left(H\left(P_{\min}\right)\right)P(y| { \lbrace (\boldsymbol{x}_n,y_n) \rbrace }^N)dy $$ @@ -95,9 +104,11 @@ Lastly, ``IfBO`` adapts the `FT-BO` idea of _freezing_ (pausing training on) con |:--:| |The image shows the Freeze-Thaw-mechanism, with the colors indicating, at what iteration a configuration has been evaluated at this fidelity. Note for example some yellow configurations being reused much later, ending in red. (Image Source: [FT-BO-paper](https://arxiv.org/pdf/1406.3896), Jan 27, 2025)| +See the algorithm's implementation details in the [api][neps.optimizers.algorithms.ifbo]. + ??? example "Practical Tips" - TODO Do we even use it? + - ``IfBO`` is a good choice when the problem allows for low-fidelity configurations to be continued to retrieve high-fidelity results, utilizing neps's [checkpointing](../evaluate_pipeline.md#arguments-for-convenience) feature. ___ For optimizers using both Priors and Multi-Fidelity, please refer [here](multifidelity_prior.md). diff --git a/docs/reference/search_algorithms/multifidelity_prior.md b/docs/reference/search_algorithms/multifidelity_prior.md index d60f9720c..cf8075a48 100644 --- a/docs/reference/search_algorithms/multifidelity_prior.md +++ b/docs/reference/search_algorithms/multifidelity_prior.md @@ -7,7 +7,7 @@ For a detailed explanation of Multi-Fidelity and Priors, please refer [here](lan ### 1 `PriorBand` -`PriorBand` is an extension of [`HyperBand`](../../reference/search_algorithms/multifidelity.md#2-hyperband) that utilizes expert Priors to choose the next configuration. +`PriorBand` (see [paper](https://openreview.net/pdf?id=uoiwugtpCH)) is an extension of [`HyperBand`](../../reference/search_algorithms/multifidelity.md#2-hyperband) that utilizes expert Priors to choose the next configuration. ``PriorBand``'s sampling module $\mathcal{E}_\pi$ balances the influence of the Prior, the incumbent configurations and randomness to select configurations. @@ -15,11 +15,17 @@ For a detailed explanation of Multi-Fidelity and Priors, please refer [here](lan |:--:| |The ``PriorBand`` sampling module balances the influence of the Prior, the $1/\eta$ incumbent configurations and randomness to select configurations. (Image Source: [PriorBand-paper](https://openreview.net/pdf?id=uoiwugtpCH), Jan 27, 2025)| -The Prior sampling $p_\pi$ is most meaningful at full fidelity and when not much data is available yet, while the incumbent sampling $p_{\hat{\lambda}}$, coming from actual data, is most significant but sparse, and random sampling $p_{\mathcal{U}}$ is needed for exploration, especially at lower fidelities. This results in these inital sampling probabilities when there is no incument yet: +The Prior sampling $p_\pi$ is most meaningful at full fidelity and when not much data is available yet, while the incumbent sampling $p_{\hat{\lambda}}$, coming from actual data, is most significant but sparse, and random sampling $p_{\mathcal{U}}$ is needed for exploration, especially at lower fidelities. This results in these inital sampling probabilities when there is no incumbent yet: + +$$ +p_{\mathcal{U}}=1/(1+\eta^r) +$$ + +$$ +p_\pi=1-p_{\mathcal{U}} +$$ $$ -p_{\mathcal{U}}=1/(1+\eta^r)\\ -p_\pi=1-p_{\mathcal{U}}\\ p_{\hat{\lambda}}=0 $$ @@ -28,13 +34,21 @@ where $\eta$ is the promotion-hyperparameter from [`HyperBand`](../../reference/ When there is an incumbent, the probabilities are adjusted to: $$ -p_{\mathcal{U}}=1/(1+\eta^r)\\ -p_\pi=p_\pi\cdot\mathcal{S}_{\hat{\lambda}}/(\mathcal{S}_\pi+\mathcal{S}_{\hat{\lambda}})\\ +p_{\mathcal{U}}=1/(1+\eta^r) +$$ + +$$ +p_\pi=p_\pi\cdot\mathcal{S}_{\hat{\lambda}}/(\mathcal{S}_\pi+\mathcal{S}_{\hat{\lambda}}) +$$ + +$$ p_{\hat{\lambda}}=p_{\hat{\lambda}}\cdot\mathcal{S}_{\pi}/(\mathcal{S}_\pi+\mathcal{S}_{\hat{\lambda}}) $$ where $\mathcal{S}_\pi$ and $\mathcal{S}_{\hat{\lambda}}$ are the summed probabilities of the top $1/\eta$ configurations under Prior and incumbent sampling, respectively. This way, the balance is shifted towards the distribution that would have yielded the best configurations so far. Crucially, this compensates for potentially bad Priors, as the incumbent sampling will take over when it has proven to be better. +See the algorithm's implementation details in the [api][neps.optimizers.algorithms.priorband]. + ??? example "Practical Tips" - ``PriorBand`` is a good choice when you have a Prior but are wary of its quality and you can utilize Multi-Fidelity. diff --git a/docs/reference/search_algorithms/prior.md b/docs/reference/search_algorithms/prior.md index 68a5d6d6b..65916c254 100644 --- a/docs/reference/search_algorithms/prior.md +++ b/docs/reference/search_algorithms/prior.md @@ -19,9 +19,11 @@ The following illustration from the `PiBO`-paper shows the influence of a well-c In both cases, the optimization process uses the additional information provided by the Prior to arrive at the solution, however, the bad Prior (right) results in a slower convergence to the optimum. +See the algorithm's implementation details in the [api][neps.optimizers.algorithms.pibo]. + ??? example "Practical Tips" - TODO Write about what to consider when using `PiBO` in NePS. + TODO !!! info ``PiBO`` is chosen as the [default optimizer](../../reference/optimizers.md#21-automatic-optimizer-selection) in NePS when there is only Prior, but no [Multi-Fidelity](../search_algorithms/multifidelity.md) information available. diff --git a/docs/reference/seeding.md b/docs/reference/seeding.md new file mode 100644 index 000000000..ffb229423 --- /dev/null +++ b/docs/reference/seeding.md @@ -0,0 +1,5 @@ +# Seeding + +Seeding is only rudimentarily supported in NePS, as we provide a function to capture the global rng state of `Python`, `numpy` and `torch`. It is not yet possible to seed only NePS internally. + +See the [Seeding API][neps.state.seed_snapshot.SeedSnapshot] for the details on how to [capture][neps.state.seed_snapshot.SeedSnapshot.new_capture] and [use][neps.state.seed_snapshot.SeedSnapshot.set_as_global_seed_state] this global rng state. diff --git a/mkdocs.yml b/mkdocs.yml index 37a3e979a..af68b33ac 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -94,7 +94,7 @@ plugins: python: paths: [neps] # Extra objects which allow for linking to external docs - import: + inventories: - 'https://docs.python.org/3/objects.inv' - 'https://numpy.org/doc/stable/objects.inv' - 'https://pandas.pydata.org/docs/objects.inv' @@ -141,10 +141,10 @@ nav: - Run: 'reference/neps_run.md' - Search Space: 'reference/pipeline_space.md' - Optimizer: 'reference/optimizers.md' - - Declarative Usage: 'reference/declarative_usage.md' - The Evaluate Function: 'reference/evaluate_pipeline.md' - CLI Usage: 'reference/cli.md' - Analysing Runs: 'reference/analyse.md' + - Seeding: 'reference/seeding.md' - Examples: "examples/" # auto-generated - Algorithms: - Algorithms: 'reference/search_algorithms/landing_page_algo.md' diff --git a/neps/optimizers/algorithms.py b/neps/optimizers/algorithms.py index 6de8f67be..0794a69d3 100644 --- a/neps/optimizers/algorithms.py +++ b/neps/optimizers/algorithms.py @@ -7,6 +7,7 @@ the parameters available. You can pass these functoins to `neps.run()` if you like, otherwise you may also refer to them by their string name. """ + # NOTE: If updating this file with new optimizers, please be aware that # the documentation here is what is shown in the `neps.run()` documentation. # Heres a checklist: @@ -16,6 +17,7 @@ from __future__ import annotations +import logging from collections.abc import Callable, Mapping, Sequence from dataclasses import dataclass, field from functools import partial @@ -42,6 +44,8 @@ from neps.optimizers.utils.brackets import Bracket from neps.space import SearchSpace +logger = logging.getLogger(__name__) + def _bo( pipeline_space: SearchSpace, @@ -50,6 +54,7 @@ def _bo( use_priors: bool, cost_aware: bool | Literal["log"], sample_prior_first: bool, + ignore_fidelity: bool = False, device: torch.device | str | None, ) -> BayesianOptimization: """Initialise the BO loop. @@ -70,19 +75,25 @@ def _bo( If using `cost`, cost must be provided in the reports of the trials. sample_prior_first: Whether to sample the default configuration first. + ignore_fidelity: Whether to ignore fidelity when sampling. + In this case, the max fidelity is always used. device: Device to use for the optimization. Raises: ValueError: if initial_design_size < 1 + ValueError: if fidelity is not None and ignore_fidelity is False """ - if any(pipeline_space.fidelities): + if not ignore_fidelity and pipeline_space.fidelity is not None: raise ValueError( - "Fidelities are not supported for BayesianOptimization." - " Please consider setting the fidelity to a constant value." - f" Got: {pipeline_space.fidelities}" + "Fidelities are not supported for BayesianOptimization. Consider setting the" + " fidelity to a constant value or ignoring it using ignore_fidelity to" + f" always sample at max fidelity. Got fidelity: {pipeline_space.fidelities} " ) - parameters = pipeline_space.searchables + if ignore_fidelity: + parameters = {**pipeline_space.searchables, **pipeline_space.fidelities} + else: + parameters = {**pipeline_space.searchables} match initial_design_size: case "ndim": @@ -181,11 +192,16 @@ def _bracket_optimizer( # noqa: C901, PLR0912, PLR0915 sample_prior_first: Whether to sample the prior configuration first. device: If using Bayesian Optimization, the device to use for the optimization. """ - assert pipeline_space.fidelity is not None - fidelity_name, fidelity = pipeline_space.fidelity + if pipeline_space.fidelity is not None: + fidelity_name, fidelity = pipeline_space.fidelity + else: + raise ValueError( + "Fidelity is required for bracket optimizers like" + f" {bracket_type if sampler != 'priorband' else 'priorband'}." + ) parameters = pipeline_space.searchables - if len(pipeline_space.fidelities) != 1: + if len(pipeline_space.fidelities) > 1: raise ValueError( "Only one fidelity should be defined in the pipeline space." f"\nGot: {pipeline_space.fidelities}" @@ -196,6 +212,14 @@ def _bracket_optimizer( # noqa: C901, PLR0912, PLR0915 "sample_prior_first should be either True, False or 'highest_fidelity'" ) + if ( + sample_prior_first in (True, "highest_fidelity") or sampler == "prior" + ) and not any(parameter.prior is not None for parameter in parameters.values()): + raise ValueError( + "No priors given to sample from. Consider setting sample_prior_first=False" + " and sampler='uniform'." + ) + from neps.optimizers.utils import brackets # Determine the strategy for creating brackets for sampling @@ -346,7 +370,7 @@ def random_search( pipeline_space: SearchSpace, *, use_priors: bool = False, - ignore_fidelity: bool = True, + ignore_fidelity: bool | Literal["highest fidelity"] = False, ) -> RandomSearch: """A simple random search algorithm that samples configurations uniformly at random. @@ -359,10 +383,48 @@ def random_search( ignore_fidelity: Whether to ignore fidelity when sampling. In this case, the max fidelity is always used. """ - if ignore_fidelity: - parameters = pipeline_space.searchables - else: - parameters = {**pipeline_space.searchables, **pipeline_space.fidelities} + assert ignore_fidelity in ( + True, + False, + "highest fidelity", + ), "ignore_fidelity should be either True, False or 'highest fidelity'" + if not ignore_fidelity and pipeline_space.fidelity is not None: + raise ValueError( + "Fidelities are not supported for RandomSearch. Consider setting the" + " fidelity to a constant value, or setting ignore_fidelity to True to sample" + " from it like any other parameter or 'highest fidelity' to always sample at" + f" max fidelity. Got fidelity: {pipeline_space.fidelities} " + ) + if ignore_fidelity in (True, "highest fidelity") and pipeline_space.fidelity is None: + logger.warning( + "Warning: You are using ignore_fidelity, but no fidelity is defined in the" + " search space. Consider setting ignore_fidelity to False." + ) + match ignore_fidelity: + case True: + parameters = {**pipeline_space.searchables, **pipeline_space.fidelities} + case False: + parameters = {**pipeline_space.searchables} + case "highest fidelity": + parameters = {**pipeline_space.searchables} + + if use_priors and not any( + parameter.prior is not None for parameter in parameters.values() + ): + logger.warning( + "Warning: You are using priors, but no priors are defined in the search" + " space. Consider setting use_priors to False." + ) + + if not use_priors and any( + parameter.prior is not None for parameter in parameters.values() + ): + priors = [ + parameter for parameter in parameters.values() if parameter.prior is not None + ] + raise ValueError( + f"To use priors, you must set use_priors=True. Got priors: {priors}" + ) return RandomSearch( space=pipeline_space, @@ -375,16 +437,33 @@ def random_search( ) -def grid_search(pipeline_space: SearchSpace) -> GridSearch: +def grid_search( + pipeline_space: SearchSpace, + ignore_fidelity: bool = False, # noqa: FBT001, FBT002 +) -> GridSearch: """A simple grid search algorithm which discretizes the search space and evaluates all possible configurations. Args: pipeline_space: The search space to sample from. + ignore_fidelity: Whether to ignore fidelity when sampling. + In this case, the max fidelity is always used. """ from neps.optimizers.utils.grid import make_grid - return GridSearch(configs_list=make_grid(pipeline_space)) + if any( + parameter.prior is not None for parameter in pipeline_space.searchables.values() + ): + raise ValueError("Grid search does not support priors.") + if ignore_fidelity and pipeline_space.fidelity is None: + logger.warning( + "Warning: You are using ignore_fidelity, but no fidelity is defined in the" + " search space. Consider setting ignore_fidelity to False." + ) + + return GridSearch( + configs_list=make_grid(pipeline_space, ignore_fidelity=ignore_fidelity) + ) def ifbo( @@ -440,13 +519,32 @@ def ifbo( """ from neps.optimizers.ifbo import _adjust_space_to_match_stepsize + if pipeline_space.fidelity is None: + raise ValueError("Fidelity is required for IFBO.") + # TODO: I'm not sure how this might effect tables, whose lowest fidelity # might be below to possibly increased lower bound. space, fid_bins = _adjust_space_to_match_stepsize(pipeline_space, step_size) - assert space.fidelity is not None - fidelity_name, fidelity = space.fidelity parameters = space.searchables + if use_priors and not any( + parameter.prior is not None for parameter in parameters.values() + ): + logger.warning( + "Warning: You are using priors, but no priors are defined in the search" + " space. Consider setting use_priors to False." + ) + + if not use_priors and any( + parameter.prior is not None for parameter in parameters.values() + ): + priors = [ + parameter for parameter in parameters.values() if parameter.prior is not None + ] + raise ValueError( + f"To use priors, you must set use_priors=True. Got priors: {priors}" + ) + match initial_design_size: case "ndim": _initial_design_size = len(parameters) @@ -802,6 +900,11 @@ def priorband( `N` * `maximum_fidelity` worth of fidelity has been evaluated, proceed with bayesian optimization when sampling a new configuration. """ + if not any(parameter.prior is not None for parameter in space.searchables.values()): + logger.warning( + "Warning: No priors are defined in the search space, priorband will sample" + " uniformly. Consider using hyperband instead." + ) return _bracket_optimizer( pipeline_space=space, bracket_type=base, @@ -819,6 +922,7 @@ def bayesian_optimization( *, initial_design_size: int | Literal["ndim"] = "ndim", cost_aware: bool | Literal["log"] = False, + ignore_fidelity: bool = False, device: torch.device | str | None = None, ) -> BayesianOptimization: """Models the relation between hyperparameters in your `pipeline_space` @@ -859,8 +963,34 @@ def bayesian_optimization( If using `cost`, cost must be provided in the reports of the trials. + ignore_fidelity: Whether to ignore the fidelity parameter when sampling. + In this case, the max fidelity is always used. device: Device to use for the optimization. """ + + if not ignore_fidelity and space.fidelity is not None: + raise ValueError( + "Fidelities are not supported for BayesianOptimization. Consider setting the" + " fidelity to a constant value or ignoring it using ignore_fidelity to" + f" always sample at max fidelity. Got fidelity: {space.fidelities} " + ) + if ignore_fidelity and space.fidelity is None: + logger.warning( + "Warning: You are using ignore_fidelity, but no fidelity is defined in the" + " search space. Consider setting ignore_fidelity to False." + ) + + if any(parameter.prior is not None for parameter in space.searchables.values()): + priors = [ + parameter + for parameter in space.searchables.values() + if parameter.prior is not None + ] + raise ValueError( + "Bayesian optimization does not support priors. Consider using pibo instead." + f" Got priors: {priors}" + ) + return _bo( pipeline_space=space, initial_design_size=initial_design_size, @@ -868,6 +998,7 @@ def bayesian_optimization( device=device, use_priors=False, sample_prior_first=False, + ignore_fidelity=ignore_fidelity, ) @@ -878,6 +1009,7 @@ def pibo( cost_aware: bool | Literal["log"] = False, device: torch.device | str | None = None, sample_prior_first: bool = False, + ignore_fidelity: bool = False, ) -> BayesianOptimization: """A modification of [`bayesian_optimization`][neps.optimizers.algorithms.bayesian_optimization] @@ -899,15 +1031,27 @@ def pibo( cost_aware: Whether to consider reported "cost" from configurations in decision making. If True, the optimizer will weigh potential candidates by how much they cost, incentivising the optimizer to explore cheap, good performing - configurations. This amount is modified over time. If "log", the cost - will be log-transformed before being used. - - !!! warning - - If using `cost`, cost must be provided in the reports of the trials. + configurations. This amount is modified over time. If "log", the cost will be + log-transformed before being used. + !!! warning + If using `cost`, cost must be provided in the reports of the trials. device: Device to use for the optimization. + sample_prior_first: Whether to sample the prior configuration first. + ignore_fidelity: Whether to ignore the fidelity parameter when sampling. + In this case, the max fidelity is always used. """ + if not any(parameter.prior is not None for parameter in space.searchables.values()): + logger.warning( + "Warning: PiBO was called without any priors - using uniform priors on all" + " parameters.\nConsider using Bayesian Optimization instead." + ) + if ignore_fidelity and space.fidelity is None: + logger.warning( + "Warning: You are using ignore_fidelity, but no fidelity is defined in the" + " search space. Consider setting ignore_fidelity to False." + ) + return _bo( pipeline_space=space, initial_design_size=initial_design_size, @@ -915,6 +1059,7 @@ def pibo( device=device, use_priors=True, sample_prior_first=sample_prior_first, + ignore_fidelity=ignore_fidelity, ) diff --git a/neps/optimizers/bayesian_optimization.py b/neps/optimizers/bayesian_optimization.py index ec556803d..f29de0a60 100644 --- a/neps/optimizers/bayesian_optimization.py +++ b/neps/optimizers/bayesian_optimization.py @@ -79,14 +79,20 @@ class BayesianOptimization: device: torch.device | None """The device to use for the optimization.""" - def __call__( + def __call__( # noqa: C901, PLR0912 self, trials: Mapping[str, Trial], budget_info: BudgetInfo | None = None, n: int | None = None, ) -> SampledConfig | list[SampledConfig]: - assert self.space.fidelity is None, "Fidelity not supported yet." - parameters = self.space.searchables + # If fidelities exist, sample from them as normal + # This is a bit of a hack, as we set them to max fidelity + # afterwards, but we need the complete space to sample + + if self.space.fidelity is not None: + parameters = {**self.space.searchables, **self.space.fidelities} + else: + parameters = {**self.space.searchables} n_to_sample = 1 if n is None else n n_sampled = len(trials) @@ -117,6 +123,10 @@ def __call__( design_samples = design_samples[n_evaluated:] for sample in design_samples: sample.update(self.space.constants) + if self.space.fidelity is not None: + sample.update( + {key: value.upper for key, value in self.space.fidelities.items()} + ) sampled_configs.extend( [ @@ -193,6 +203,10 @@ def __call__( configs = encoder.decode(candidates) for config in configs: config.update(self.space.constants) + if self.space.fidelity is not None: + config.update( + {key: value.upper for key, value in self.space.fidelities.items()} + ) sampled_configs.extend( [ diff --git a/neps/optimizers/random_search.py b/neps/optimizers/random_search.py index 5b6742a6a..376baa782 100644 --- a/neps/optimizers/random_search.py +++ b/neps/optimizers/random_search.py @@ -33,6 +33,14 @@ def __call__( config_dicts = self.encoder.decode(configs) for config in config_dicts: config.update(self.space.constants) + if self.space.fidelity is not None: + config.update( + { + key: value.upper + for key, value in self.space.fidelities.items() + if key not in config + } + ) if n is None: config = config_dicts[0] diff --git a/neps/optimizers/utils/grid.py b/neps/optimizers/utils/grid.py index 7b7fbef5b..720dd7713 100644 --- a/neps/optimizers/utils/grid.py +++ b/neps/optimizers/utils/grid.py @@ -12,6 +12,7 @@ def make_grid( space: SearchSpace, *, size_per_numerical_hp: int = 10, + ignore_fidelity: bool = True, ) -> list[dict[str, Any]]: """Get a grid of configurations from the search space. @@ -38,7 +39,7 @@ def make_grid( case Constant(): param_ranges[name] = [hp.value] case Integer() | Float(): - if hp.is_fidelity: + if hp.is_fidelity and ignore_fidelity: param_ranges[name] = [hp.upper] continue @@ -53,7 +54,6 @@ def make_grid( param_ranges[name] = uniq_values case _: raise NotImplementedError(f"Unknown Parameter type: {type(hp)}\n{hp}") - values = product(*param_ranges.values()) keys = list(space.keys()) diff --git a/neps/state/pipeline_eval.py b/neps/state/pipeline_eval.py index 8d4cce0eb..7903fec18 100644 --- a/neps/state/pipeline_eval.py +++ b/neps/state/pipeline_eval.py @@ -63,7 +63,7 @@ class UserResultDict(TypedDict, total=False): Order is important. * `Exception`: The evaluation failed. * `Mapping[str, Any]`: A dict which follows the layout of - [`UserResultDict`][neps.state.pipeline_eval] + [`UserResultDict`][neps.state.pipeline_eval.EvaluatePipelineReturn]. """ @@ -72,7 +72,8 @@ class UserResult: """The parsed values out of the possibilities the user can return from the `evaluate_pipeline_function()`. - See [`UserResultDict`][neps.state.pipeline_eval] for the possibilities. + See [`UserResultDict`][neps.state.pipeline_eval.EvaluatePipelineReturn] for the + possibilities. """ objective_to_minimize: float | list[float] | None = None diff --git a/neps_examples/README.md b/neps_examples/README.md index bc4487495..b1fc604cd 100644 --- a/neps_examples/README.md +++ b/neps_examples/README.md @@ -1,13 +1,11 @@ # Overview 1. **Basic usage examples** demonstrate fundamental usage. -Learn how to perform Hyperparameter Optimization (HPO), Neural Architecture Search (NAS), and Joint Architecture and Hyperparameter Search (JAHS). -Understand how to analyze runs on a basic level. +Learn how to perform Hyperparameter Optimization (HPO) and analyze runs on a basic level. -2. **Efficiency examples** showcase how to enhance efficiency in NePS. -Learn about expert priors, multi-fidelity, and parallelization to streamline your pipeline and optimize search processes. +2. **Convenience examples** show tensorboard compatibility and its integration, SLURM-scripting and understand file management within the evaluate pipeline function used in NePS. -3. **Convenience examples** show tensorboard compatibility and its integration, explore the compatibility with PyTorch Lightning, see the declarative API, understand file management within the evaluate pipeline function used in NePS. +3. **Efficiency examples** showcase how to enhance efficiency in NePS. Learn about expert priors, multi-fidelity, and parallelization to streamline your pipeline and optimize search processes. 4. **Experimental examples** tailored for NePS contributors. These examples provide insights and practices for experimental scenarios. diff --git a/neps_examples/basic_usage/hyperparameters.py b/neps_examples/basic_usage/hyperparameters.py index 3940f2050..6b736fcd3 100644 --- a/neps_examples/basic_usage/hyperparameters.py +++ b/neps_examples/basic_usage/hyperparameters.py @@ -1,9 +1,11 @@ import logging - import numpy as np - import neps +# This example demonstrates how to use NePS to optimize hyperparameters +# of a pipeline. The pipeline is a simple function that takes in +# five hyperparameters and returns their sum. +# Neps uses the default optimizer to minimize this objective function. def evaluate_pipeline(float1, float2, categorical, integer1, integer2): objective_to_minimize = -float( @@ -26,5 +28,5 @@ def evaluate_pipeline(float1, float2, categorical, integer1, integer2): pipeline_space=pipeline_space, root_directory="results/hyperparameters_example", post_run_summary=True, - max_evaluations_total=15, + max_evaluations_total=30, ) diff --git a/neps_examples/convenience/declarative_usage/README.md b/neps_examples/convenience/declarative_usage/README.md deleted file mode 100644 index 5ebbb2425..000000000 --- a/neps_examples/convenience/declarative_usage/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Declarative Usage in NePS for Neural Network Optimization - -This folder contains examples and templates for optimizing neural networks using NePS, configured via YAML files. -These configurations allow easy adjustments to experiment parameters and search spaces, enabling fine-tuning of your -models without modifying Python code. -### `hpo_example.py` - -This Python script demonstrates how to integrate NePS with a neural network training pipeline for hyperparameter -optimization. It utilizes a YAML configuration file to set up and run the experiments. - -```python ---8<-- "neps_examples/declarative_usage/hpo_example.py" -``` - -### `config.yaml` - -This YAML file defines the NePS arguments for the experiment. By editing this file, users can customize their -experiments without modifying the Python script. - -```yaml ---8<-- "neps_examples/declarative_usage/config.yaml" -``` - -## Quick Start Guide - -1. **Review the YAML File:** Examine `config.yaml` to understand the available configurations and how they are structured. -2. **Run the Example Script:** Execute hpo_example.py, by providing `config.yaml` via the run_args agrument to NePS. - This will initiate a hyperparameter optimization task based on your YAML configurations. -3. **Modify YAML File:** Experiment with adjusting the parameters in the YAML file to see how changes affect your - search experiments. This is a great way to learn about the impact of different configurations on your results. - -By following these steps and utilizing the provided YAML files, you'll be able to efficiently set up, run, and modify your NePS experiments. Enjoy the flexibility and simplicity that comes with managing your experiment configurations in YAML! diff --git a/neps_examples/convenience/declarative_usage/config.yaml b/neps_examples/convenience/declarative_usage/config.yaml deleted file mode 100644 index 858eb6e52..000000000 --- a/neps_examples/convenience/declarative_usage/config.yaml +++ /dev/null @@ -1,25 +0,0 @@ -experiment: - root_directory: "results/example_run" - max_evaluations_total: 20 - overwrite_working_directory: true - post_run_summary: true - -pipeline_space: - epochs: 5 - learning_rate: - lower: 1e-5 - upper: 1e-1 - log: true - num_layers: - lower: 1 - upper: 5 - optimizer: - choices: ["adam", "sgd"] - num_neurons: - lower: 64 - upper: 128 - -optimizer: - name: "bayesian_optimization" - initial_design_size: 5 - surrogate_model: gp diff --git a/neps_examples/convenience/declarative_usage/hpo_example.py b/neps_examples/convenience/declarative_usage/hpo_example.py deleted file mode 100644 index 0e599440b..000000000 --- a/neps_examples/convenience/declarative_usage/hpo_example.py +++ /dev/null @@ -1,127 +0,0 @@ -import logging - -import torch -import torch.nn as nn -import torch.optim as optim -from torch.utils.data import DataLoader -from torchvision import datasets, transforms - -import neps - -""" -This script demonstrates the integration of a simple neural network training pipeline -with NePS for hyperparameter optimization, focusing on the MNIST dataset. - -- SimpleNN Class: A PyTorch neural network model that constructs a feedforward - architecture based on input size, number of layers, and neurons per layer. - -- Training Pipeline: A function that takes hyperparameters (number of layers, neurons, - epochs, learning rate, optimizer type) to train and validate the SimpleNN model on - the MNIST dataset. Supports Adam and SGD optimizers. - -- NEPS Integration: Shows how to automate hyperparameter tuning using NEPS. Configuration - settings are specified in a YAML file ('run_args.yaml'), which is passed to the NePS - optimization process via the `run_args` parameter. - -Usage: -1. Define model architecture and training logic in `SimpleNN` and `training_pipeline`. -2. Configure hyperparameters and optimization settings in 'config.yaml'. -3. Launch optimization with NePS by calling `neps.run`, specifying the training pipeline, -and configuration file(config.yaml). -""" - - -class SimpleNN(nn.Module): - def __init__(self, input_size, num_layers, num_neurons): - super().__init__() - layers = [nn.Flatten()] - - for _ in range(num_layers): - layers.append(nn.Linear(input_size, num_neurons)) - layers.append(nn.ReLU()) - input_size = num_neurons # Set input size for the next layer - - layers.append(nn.Linear(num_neurons, 10)) # Output layer for 10 classes - self.model = nn.Sequential(*layers) - - def forward(self, x): - return self.model(x) - - -def training_pipeline(num_layers, num_neurons, epochs, learning_rate, optimizer): - """ - Trains and validates a simple neural network on the MNIST dataset. - - Args: - num_layers (int): Number of hidden layers in the network. - num_neurons (int): Number of neurons in each hidden layer. - epochs (int): Number of training epochs. - learning_rate (float): Learning rate for the optimizer. - optimizer (str): Name of the optimizer to use ('adam' or 'sgd'). - - Returns: - float: The average objective_to_minimize over the validation set after training. - - Raises: - KeyError: If the specified optimizer is not supported. - """ - # Transformations applied on each image - transform = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - (0.1307,), (0.3081,) - ), # Mean and Std Deviation for MNIST - ] - ) - - # Loading MNIST dataset - dataset = datasets.MNIST( - root="./data", train=True, download=True, transform=transform - ) - train_set, val_set = torch.utils.data.random_split(dataset, [55000, 5000]) - train_loader = DataLoader(train_set, batch_size=64, shuffle=True) - val_loader = DataLoader(val_set, batch_size=1000, shuffle=False) - - model = SimpleNN(28 * 28, num_layers, num_neurons) - criterion = nn.CrossEntropyLoss() - - # Select optimizer - if optimizer == "adam": - optimizer = optim.Adam(model.parameters(), lr=learning_rate) - elif optimizer == "sgd": - optimizer = optim.SGD(model.parameters(), lr=learning_rate) - else: - raise KeyError(f"optimizer {optimizer} is not available") - - # Training loop - - for epoch in range(epochs): - model.train() - for batch_idx, (data, target) in enumerate(train_loader): - optimizer.zero_grad() - output = model(data) - objective_to_minimize = criterion(output, target) - objective_to_minimize.backward() - optimizer.step() - - # Validation loop - model.eval() - val_objective_to_minimize = 0 - with torch.no_grad(): - for data, target in val_loader: - output = model(data) - val_objective_to_minimize += criterion(output, target).item() - - val_objective_to_minimize /= len(val_loader.dataset) - return val_objective_to_minimize - - -if __name__ == "__main__": - # Configure logging to display important messages from NePS. - logging.basicConfig(level=logging.INFO) - - # Run optimization using neps.run(...). Arguments can be provided directly to neps.run - # or defined in a configuration file (e.g., "config.yaml") passed through - # the run_args parameter. - neps.run(training_pipeline, run_args="config.yaml") diff --git a/neps_examples/convenience/neps_tblogger_tutorial.py b/neps_examples/convenience/neps_tblogger_tutorial.py index 48dee50e1..fd9bc8144 100644 --- a/neps_examples/convenience/neps_tblogger_tutorial.py +++ b/neps_examples/convenience/neps_tblogger_tutorial.py @@ -60,7 +60,7 @@ #5 Design the evaluate pipeline function. #6 Use neps.run the run the entire search using your specified optimizer. -Each step will be covered in detail thourghout the code +Each step will be covered in detail throughout the code """ @@ -85,10 +85,10 @@ def MNIST( ) -> Tuple[DataLoader, DataLoader, DataLoader]: # Download MNIST training and test datasets if not already downloaded. train_dataset = torchvision.datasets.MNIST( - root="./data", train=True, transform=transforms.ToTensor(), download=True + root=".data", train=True, transform=transforms.ToTensor(), download=True ) test_dataset = torchvision.datasets.MNIST( - root="./data", train=False, transform=transforms.ToTensor(), download=True + root=".data", train=False, transform=transforms.ToTensor(), download=True ) # Determine the size of the reduced training dataset for faster training @@ -325,7 +325,7 @@ def evaluate_pipeline(lr, optim, weight_decay): run bayesian optimization with 3 evaluations total. ```bash - python neps_tblogger_tutorial.py + python neps_examples\convenience\neps_tblogger_tutorial.py ``` """ start_time = time.time() @@ -349,9 +349,10 @@ def evaluate_pipeline(lr, optim, weight_decay): To check live plots during this search, please open a new terminal and make sure to be at the same level directory of your project and run the following command on the file created by neps root_directory. + Running both from root-directory, the command would be: ```bash: - tensorboard --logdir output + tensorboard --logdir results\neps_tblogger_example ``` To be able to check the visualization of tensorboard make sure to diff --git a/neps_examples/convenience/working_directory_per_pipeline.py b/neps_examples/convenience/working_directory_per_pipeline.py index cedde75bc..de2ec9fd9 100644 --- a/neps_examples/convenience/working_directory_per_pipeline.py +++ b/neps_examples/convenience/working_directory_per_pipeline.py @@ -11,8 +11,8 @@ def evaluate_pipeline(pipeline_directory: Path, float1, categorical, integer1): # When adding pipeline_directory to evaluate_pipeline, neps detects its presence and # passes a directory unique for each pipeline configuration. You can then use this # pipeline_directory to create / save files pertaining to a specific pipeline, e.g.: - weight_file = pipeline_directory / "weight_file.txt" - weight_file.write_text("0") + pipeline_info = pipeline_directory / "info_file.txt" + pipeline_info.write_text(f"{float1} - {categorical} - {integer1}") objective_to_minimize = -float(np.sum([float1, int(categorical), integer1])) return objective_to_minimize diff --git a/neps_examples/efficiency/expert_priors_for_hyperparameters.py b/neps_examples/efficiency/expert_priors_for_hyperparameters.py index a78dad043..5980668a5 100644 --- a/neps_examples/efficiency/expert_priors_for_hyperparameters.py +++ b/neps_examples/efficiency/expert_priors_for_hyperparameters.py @@ -1,6 +1,5 @@ import logging import time -from warnings import warn import neps diff --git a/neps_examples/efficiency/multi_fidelity.py b/neps_examples/efficiency/multi_fidelity.py index 326e2f286..c85d411e1 100644 --- a/neps_examples/efficiency/multi_fidelity.py +++ b/neps_examples/efficiency/multi_fidelity.py @@ -1,5 +1,4 @@ import logging -from warnings import warn import numpy as np from pathlib import Path @@ -42,6 +41,7 @@ def get_model_and_optimizer(learning_rate): # Important: Include the "pipeline_directory" and "previous_pipeline_directory" arguments # in your evaluate_pipeline function. This grants access to NePS's folder system and is # critical for leveraging efficient multi-fidelity optimization strategies. +# For more details, refer to the working_directory_per_pipeline example in convenience. def evaluate_pipeline( diff --git a/neps_examples/efficiency/multi_fidelity_and_expert_priors.py b/neps_examples/efficiency/multi_fidelity_and_expert_priors.py index f056e95dc..96f7b2b3e 100644 --- a/neps_examples/efficiency/multi_fidelity_and_expert_priors.py +++ b/neps_examples/efficiency/multi_fidelity_and_expert_priors.py @@ -1,10 +1,10 @@ import logging -from warnings import warn import numpy as np - import neps +# This example demonstrates NePS uses both fidelity and expert priors to +# optimize hyperparameters of a pipeline. def evaluate_pipeline(float1, float2, integer1, fidelity): objective_to_minimize = -float(np.sum([float1, float2, integer1])) / fidelity diff --git a/neps_examples/efficiency/pytorch_lightning_ddp.py b/neps_examples/efficiency/pytorch_lightning_ddp.py index 4b387ed43..96b620de1 100644 --- a/neps_examples/efficiency/pytorch_lightning_ddp.py +++ b/neps_examples/efficiency/pytorch_lightning_ddp.py @@ -1,12 +1,14 @@ +import logging + import lightning as L import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader, random_split import neps -import logging - -NUM_GPU = 8 # Number of GPUs to use for DDP + +NUM_GPU = 8 # Number of GPUs to use for DDP + class ToyModel(nn.Module): """ Taken from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html """ @@ -18,13 +20,13 @@ def __init__(self): def forward(self, x): return self.net2(self.relu(self.net1(x))) - + class LightningModel(L.LightningModule): def __init__(self, lr): super().__init__() self.lr = lr self.model = ToyModel() - + def training_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) @@ -38,17 +40,17 @@ def validation_step(self, batch, batch_idx): loss = F.mse_loss(y_hat, y) self.log("val_loss", loss, prog_bar=True, sync_dist=True) return loss - + def test_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) loss = F.mse_loss(y_hat, y) self.log("test_loss", loss, prog_bar=True, sync_dist=True) return loss - + def configure_optimizers(self): return torch.optim.SGD(self.parameters(), lr=self.lr) - + def evaluate_pipeline(lr=0.1, epoch=20): L.seed_everything(42) # Model @@ -59,7 +61,7 @@ def evaluate_pipeline(lr=0.1, epoch=20): labels = torch.rand((1000, 5)) dataset = list(zip(data, labels)) - + train_dataset, val_dataset, test_dataset = random_split(dataset, [600, 200, 200]) # Define simple data loaders using tensors and slicing @@ -76,7 +78,8 @@ def evaluate_pipeline(lr=0.1, epoch=20): ) trainer.fit(model, train_dataloader, val_dataloader) trainer.validate(model, test_dataloader) - return trainer.logged_metrics["val_loss"] + return trainer.logged_metrics["val_loss"].item() + pipeline_space = dict( lr=neps.Float( @@ -98,4 +101,4 @@ def evaluate_pipeline(lr=0.1, epoch=20): pipeline_space=pipeline_space, root_directory="results/pytorch_lightning_ddp", max_evaluations_total=5 - ) \ No newline at end of file + ) diff --git a/neps_examples/efficiency/pytorch_lightning_fsdp.py b/neps_examples/efficiency/pytorch_lightning_fsdp.py index 2fa222a0c..6af3d6746 100644 --- a/neps_examples/efficiency/pytorch_lightning_fsdp.py +++ b/neps_examples/efficiency/pytorch_lightning_fsdp.py @@ -1,7 +1,8 @@ -# Based on: https://lightning.ai/docs/pytorch/stable/advanced/model_parallel/fsdp.html +"""Based on: https://lightning.ai/docs/pytorch/stable/advanced/model_parallel/fsdp.html + +Mind that this example does not run on Windows at the moment.""" import torch -import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader @@ -54,12 +55,12 @@ def evaluate_pipeline(lr=0.1, epoch=20): import logging logging.basicConfig(level=logging.INFO) - + pipeline_space = dict( lr=neps.Float( - lower=0.0001, - upper=0.1, - log=True, + lower=0.0001, + upper=0.1, + log=True, prior=0.01 ), epoch=neps.Integer( @@ -70,8 +71,8 @@ def evaluate_pipeline(lr=0.1, epoch=20): ) neps.run( - evaluate_pipeline=evaluate_pipeline, - pipeline_space=pipeline_space, - root_directory="results/pytorch_lightning_fsdp", + evaluate_pipeline=evaluate_pipeline, + pipeline_space=pipeline_space, + root_directory="results/pytorch_lightning_fsdp", max_evaluations_total=5 ) diff --git a/neps_examples/efficiency/pytorch_native_ddp.py b/neps_examples/efficiency/pytorch_native_ddp.py index 6d0a76d0a..9ced5dc86 100644 --- a/neps_examples/efficiency/pytorch_native_ddp.py +++ b/neps_examples/efficiency/pytorch_native_ddp.py @@ -1,4 +1,6 @@ -""" Some parts of this code are taken from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html """ +""" Some parts of this code are taken from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html + +Mind that this example does not run on Windows at the moment.""" import os import sys @@ -14,7 +16,7 @@ import neps import logging -NUM_GPU = 8 # Number of GPUs to use for DDP +NUM_GPU = 8 # Number of GPUs to use for DDP # On Windows platform, the torch.distributed package only # supports Gloo backend, FileStore and TcpStore. @@ -28,6 +30,7 @@ # world_size=world_size) # For TcpStore, same way as on Linux. + def setup(rank, world_size): os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' @@ -35,6 +38,7 @@ def setup(rank, world_size): # initialize the process group dist.init_process_group("gloo", rank=rank, world_size=world_size) + def cleanup(): dist.destroy_process_group() @@ -62,7 +66,7 @@ def demo_basic(rank, world_size, loss_dict, learning_rate, epochs): loss_fn = nn.MSELoss() optimizer = optim.SGD(ddp_model.parameters(), lr=learning_rate) - + total_loss = 0.0 for epoch in range(epochs): optimizer.zero_grad() @@ -75,28 +79,29 @@ def demo_basic(rank, world_size, loss_dict, learning_rate, epochs): if rank == 0: print(f"Epoch {epoch} complete") - + loss_dict[rank] = total_loss - + cleanup() print(f"Finished running basic DDP example on rank {rank}.") -def run_pipeline(learning_rate, epochs): +def evaluate_pipeline(learning_rate, epochs): from torch.multiprocessing import Manager world_size = NUM_GPU # Number of GPUs - + manager = Manager() loss_dict = manager.dict() - + mp.spawn(demo_basic, args=(world_size, loss_dict, learning_rate, epochs), nprocs=world_size, join=True) - + loss = sum(loss_dict.values()) // world_size return {'loss': loss} + pipeline_space = dict( learning_rate=neps.Float(lower=10e-7, upper=10e-3, log=True), epochs=neps.Integer(lower=1, upper=3) @@ -104,7 +109,7 @@ def run_pipeline(learning_rate, epochs): if __name__ == '__main__': logging.basicConfig(level=logging.INFO) - neps.run(run_pipeline=run_pipeline, + neps.run(evaluate_pipeline=evaluate_pipeline, pipeline_space=pipeline_space, - root_directory="results/pytorch_ddp", - max_evaluations_total=25) \ No newline at end of file + root_directory="results/pytorch_ddp", + max_evaluations_total=25) diff --git a/neps_examples/efficiency/pytorch_native_fsdp.py b/neps_examples/efficiency/pytorch_native_fsdp.py index dfa692be5..1fec7bef3 100644 --- a/neps_examples/efficiency/pytorch_native_fsdp.py +++ b/neps_examples/efficiency/pytorch_native_fsdp.py @@ -1,4 +1,6 @@ -# Based on: https://github.com/pytorch/examples/blob/master/mnist/main.py +"""Based on: https://github.com/pytorch/examples/blob/master/mnist/main.py + +Mind that this example does not run on Windows at the moment.""" import math import os @@ -33,7 +35,7 @@ def setup(rank, world_size): def cleanup(): dist.destroy_process_group() - + class Net(nn.Module): def __init__(self): super(Net, self).__init__() @@ -59,7 +61,7 @@ def forward(self, x): x = self.fc2(x) output = F.log_softmax(x, dim=1) return output - + def train(model, rank, world_size, train_loader, optimizer, epoch, sampler=None): model.train() ddp_loss = torch.zeros(2).to(rank) @@ -78,7 +80,7 @@ def train(model, rank, world_size, train_loader, optimizer, epoch, sampler=None) dist.all_reduce(ddp_loss, op=dist.ReduceOp.SUM) if rank == 0: print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, ddp_loss[0] / ddp_loss[1])) - + def test(model, rank, world_size, test_loader): model.eval() correct = 0 @@ -101,7 +103,7 @@ def test(model, rank, world_size, test_loader): test_loss, int(ddp_loss[1]), int(ddp_loss[2]), 100. * ddp_loss[1] / ddp_loss[2])) return test_loss - + def fsdp_main(rank, world_size, test_loss_tensor, lr, epochs, save_model=False): setup(rank, world_size) @@ -145,7 +147,7 @@ def fsdp_main(rank, world_size, test_loss_tensor, lr, epochs, save_model=False): scheduler = StepLR(optimizer, step_size=1, gamma=0.7) init_start_event.record() - + test_loss = math.inf for epoch in range(1, epochs + 1): train(model, rank, world_size, train_loader, optimizer, epoch, sampler=sampler1) @@ -156,7 +158,7 @@ def fsdp_main(rank, world_size, test_loss_tensor, lr, epochs, save_model=False): if rank == 0: test_loss_tensor[0] = test_loss - + init_end_event.record() if rank == 0: @@ -170,33 +172,33 @@ def fsdp_main(rank, world_size, test_loss_tensor, lr, epochs, save_model=False): if rank == 0: torch.save(states, "mnist_cnn.pt") cleanup() - + def evaluate_pipeline(lr=0.1, epoch=20): torch.manual_seed(42) - + test_loss_tensor = torch.zeros(1) test_loss_tensor.share_memory_() - + mp.spawn(fsdp_main, args=(NUM_GPU, test_loss_tensor, lr, epoch), nprocs=NUM_GPU, join=True) - + loss = test_loss_tensor.item() return loss - - + + if __name__ == "__main__": import neps import logging logging.basicConfig(level=logging.INFO) - + pipeline_space = dict( lr=neps.Float( - lower=0.0001, - upper=0.1, - log=True, + lower=0.0001, + upper=0.1, + log=True, prior=0.01 ), epoch=neps.Integer( @@ -207,8 +209,8 @@ def evaluate_pipeline(lr=0.1, epoch=20): ) neps.run( - evaluate_pipeline=evaluate_pipeline, - pipeline_space=pipeline_space, - root_directory="results/pytorch_fsdp", + evaluate_pipeline=evaluate_pipeline, + pipeline_space=pipeline_space, + root_directory="results/pytorch_fsdp", max_evaluations_total=20 ) diff --git a/neps_examples/experimental/freeze_thaw.py b/neps_examples/experimental/freeze_thaw.py index c75cdc346..597e1df3d 100644 --- a/neps_examples/experimental/freeze_thaw.py +++ b/neps_examples/experimental/freeze_thaw.py @@ -167,14 +167,14 @@ def training_pipeline( evaluate_pipeline=training_pipeline, optimizer="ifbo", max_evaluations_total=50, - root_directory="./debug/ifbo-mnist/", + root_directory="./results/ifbo-mnist/", overwrite_working_directory=False, # set to False for a multi-worker run ) # NOTE: this is `experimental` and may not work as expected ## plotting a 3D plot for learning curves explored by ifbo plotter = Plotter3D( - run_path="./debug/ifbo-mnist/", # same as `root_directory` above + run_path="./results/ifbo-mnist/", # same as `root_directory` above fidelity_key="epochs", # same as `pipeline_space` ) plotter.plot3D(filename="ifbo") diff --git a/neps_examples/real_world/README.md b/neps_examples/real_world/README.md index 16da65512..953db55ee 100644 --- a/neps_examples/real_world/README.md +++ b/neps_examples/real_world/README.md @@ -7,7 +7,7 @@ This example demonstrates how to perform hyperparameter optimization (HPO) for a We compare the performance of the optimized hyperparameters with the default hyperparameters. using the validation loss achieved on the dataset after training the model with the respective hyperparameters. ```bash -python3 image_segmentation_pipeline_hpo.py +python image_segmentation_pipeline_hpo.py ``` The search space has been set with the priors set to the hyperparameters found in this base example: https://lightning.ai/lightning-ai/studios/image-segmentation-with-pytorch-lightning @@ -22,8 +22,7 @@ We run the HPO process for 188 trials and obtain new set of hyperpamereters that | weight_decay | 0.0001 | 0.00012664785026572645 | - -![Validation Loss Curves](./val_loss_image_segmentation.png) +![Validation Loss Curves](../../doc_images/examples/val_loss_image_segmentation.jpg) The validation loss achieved on the dataset after training the model with the newly sampled hyperparameters is shown in the figure above. @@ -33,4 +32,4 @@ Validation Loss with Default Hyperparameters: 0.114094577729702 Validation Loss with Optimized Hyperparameters: 0.0997161939740181 -The optimized hyperparameters outperform the default hyperparameters by 12.61%. \ No newline at end of file +The optimized hyperparameters outperform the default hyperparameters by 12.61%. diff --git a/neps_examples/real_world/image_segmentation_hpo.py b/neps_examples/real_world/image_segmentation_hpo.py index 5abbf5183..2320f20f1 100644 --- a/neps_examples/real_world/image_segmentation_hpo.py +++ b/neps_examples/real_world/image_segmentation_hpo.py @@ -1,11 +1,11 @@ # Example pipeline used from; https://lightning.ai/lightning-ai/studios/image-segmentation-with-pytorch-lightning +import os + import torch from torchvision import transforms, datasets, models import lightning as L from lightning.pytorch.strategies import DDPStrategy -import os -from lightning.pytorch.utilities.rank_zero import rank_zero_only from torch.optim.lr_scheduler import PolynomialLR @@ -18,14 +18,14 @@ def __init__(self, iters_per_epoch, lr, momentum, weight_decay): self.lr = lr self.momentum = momentum self.weight_decay = weight_decay - + def training_step(self, batch): images, targets = batch outputs = self.model(images)['out'] loss = self.loss_fn(outputs, targets.long().squeeze(1)) self.log("train_loss", loss, sync_dist=True) return loss - + def validation_step(self, batch): images, targets = batch outputs = self.model(images)['out'] @@ -39,19 +39,19 @@ def configure_optimizers(self): optimizer, total_iters=self.iters_per_epoch * self.trainer.max_epochs, power=0.9 ) return [optimizer], [scheduler] - - - + + + class SegmentationData(L.LightningDataModule): def __init__(self, batch_size=4): super().__init__() self.batch_size = batch_size - + def prepare_data(self): - dataset_path = "data/VOCtrainval_11-May-2012.tar" + dataset_path = ".data/VOC/VOCtrainval_11-May-2012.tar" if not os.path.exists(dataset_path): - datasets.VOCSegmentation(root="data", download=True) - + datasets.VOCSegmentation(root=".data/VOC", download=True) + def train_dataloader(self): transform = transforms.Compose([ transforms.ToTensor(), @@ -59,9 +59,9 @@ def train_dataloader(self): transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) target_transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((256, 256), antialias=True)]) - train_dataset = datasets.VOCSegmentation(root="data", transform=transform, target_transform=target_transform) - return torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=63) - + train_dataset = datasets.VOCSegmentation(root=".data/VOC", transform=transform, target_transform=target_transform) + return torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=16, persistent_workers=True) + def val_dataloader(self): transform = transforms.Compose([ transforms.ToTensor(), @@ -69,15 +69,15 @@ def val_dataloader(self): transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) target_transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((256, 256), antialias=True)]) - val_dataset = datasets.VOCSegmentation(root="data", year='2012', image_set='val', transform=transform, target_transform=target_transform) - return torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=63) + val_dataset = datasets.VOCSegmentation(root=".data/VOC", year='2012', image_set='val', transform=transform, target_transform=target_transform) + return torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=16, persistent_workers=True) - -def main(**kwargs): +def evaluate_pipeline(**kwargs): data = SegmentationData(kwargs.get("batch_size", 4)) + data.prepare_data() iters_per_epoch = len(data.train_dataloader()) - model = LitSegmentation(iters_per_epoch, kwargs.get("lr", 0.02), kwargs.get("momentum", 0.9), kwargs.get("weight_decay", 1e-4)) + model = LitSegmentation(iters_per_epoch, kwargs.get("lr", 0.02), kwargs.get("momentum", 0.9), kwargs.get("weight_decay", 1e-4)) trainer = L.Trainer(max_epochs=kwargs.get("epoch", 30), strategy=DDPStrategy(find_unused_parameters=True), enable_checkpointing=False) trainer.fit(model, data) val_loss = trainer.logged_metrics["val_loss"].detach().item() @@ -87,26 +87,26 @@ def main(**kwargs): if __name__ == "__main__": import neps import logging - + logging.basicConfig(level=logging.INFO) - + # Search space for hyperparameters pipeline_space = dict( lr=neps.Float( - lower=0.0001, - upper=0.1, - log=True, + lower=0.0001, + upper=0.1, + log=True, prior=0.02 ), momentum=neps.Float( - lower=0.1, - upper=0.9, + lower=0.1, + upper=0.9, prior=0.5 ), weight_decay=neps.Float( - lower=1e-5, - upper=1e-3, - log=True, + lower=1e-5, + upper=1e-3, + log=True, prior=1e-4 ), epoch=neps.Integer( @@ -122,8 +122,8 @@ def main(**kwargs): ) neps.run( - evaluate_pipeline=main, - pipeline_space=pipeline_space, - root_directory="hpo_image_segmentation", + evaluate_pipeline=evaluate_pipeline, + pipeline_space=pipeline_space, + root_directory="results/hpo_image_segmentation", max_evaluations_total=500 ) diff --git a/neps_examples/real_world/val_loss_image_segmentation.png b/neps_examples/real_world/val_loss_image_segmentation.png deleted file mode 100644 index ecb7513f2..000000000 Binary files a/neps_examples/real_world/val_loss_image_segmentation.png and /dev/null differ diff --git a/pyproject.toml b/pyproject.toml index cf1961d9c..e58149371 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,10 +42,10 @@ classifiers = [ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: System :: Distributed Computing", ] -requires-python = ">=3.10,<3.13" +requires-python = ">=3.10,<3.14" dependencies = [ - "numpy>=1.0,<2.0", + "numpy>=2.0", "pandas>=2.0,<3.0", "networkx>=2.6.3,<3.0", "scipy>=1.13.1", @@ -58,9 +58,9 @@ dependencies = [ "tensorboard>=2.0,<3.0", "typing-extensions", "torchvision>=0.8.0", - "ifbo>=0.3.10", "botorch>=0.12", "gpytorch==1.13.0", + "ifbo", ] [project.urls] @@ -327,3 +327,6 @@ replace = "version = \"v{new_version}\"" filename = "CITATION.cff" search = "version: {current_version}" replace = "version: {new_version}" + +[tool.uv.sources] +ifbo = { git = "https://github.com/automl/ifBO", rev = "53f1207" } diff --git a/tests/test_state/test_neps_state.py b/tests/test_state/test_neps_state.py index 57b6db946..92c5d2252 100644 --- a/tests/test_state/test_neps_state.py +++ b/tests/test_state/test_neps_state.py @@ -84,43 +84,35 @@ def case_search_space_fid_with_prior() -> SearchSpace: "multifidelity_tpe", ] -OPTIMIZER_FAILS_WITH_FIDELITY = [ +# There's no programattic way to check if a class requires or +# doesnt support a fidelity/prior. +# See issue #118, #119, #120 +# For now, keep these lists up to date manually and xfail the tests +# that require a fidelity/prior. +REQUIRES_FIDELITY = [ + "successive_halving", + "asha", + "hyperband", + "async_hb", + "ifbo", + "priorband", +] +NO_DEFAULT_FIDELITY_SUPPORT = [ "random_search", - "bayesian_optimization_cost_aware", + "grid_search", "bayesian_optimization", - "bayesian_optimization_prior", "pibo", - "cost_cooling_bayesian_optimization", - "cost_cooling", ] - -# There's no programattic way to check if a class requires a fidelity. -# See issue #118, #119, #120 -OPTIMIZER_REQUIRES_FIDELITY = [ +NO_DEFAULT_PRIOR_SUPPORT = [ + "grid_search", + "bayesian_optimization", + "ifbo", "successive_halving", - "successive_halving_prior", "asha", - "asha_prior", "hyperband", - "hyperband_prior", "async_hb", - "async_hb_prior", - "priorband", - "priorband_sh", - "priorband_asha", - "priorband_async", - "priorband_bo", - "bayesian_optimization_cost_aware", - "mobster", - "ifbo", + "random_search", ] -REQUIRES_PRIOR = { - "priorband", - "priorband_bo", - "priorband_asha", - "priorband_asha_hyperband", -} -REQUIRES_COST = ["cost_cooling_bayesian_optimization", "cost_cooling"] @fixture @@ -132,13 +124,15 @@ def optimizer_and_key_and_search_space( if key in JUST_SKIP: pytest.xfail(f"{key} is not instantiable") - if key in REQUIRES_PRIOR and search_space.searchables["a"].prior is None: - pytest.xfail(f"{key} requires a prior") + if key in NO_DEFAULT_PRIOR_SUPPORT and any( + parameter.prior is not None for parameter in search_space.searchables.values() + ): + pytest.xfail(f"{key} crashed with a prior") - if len(search_space.fidelities) > 0 and key in OPTIMIZER_FAILS_WITH_FIDELITY: + if search_space.fidelity is not None and key in NO_DEFAULT_FIDELITY_SUPPORT: pytest.xfail(f"{key} crashed with a fidelity") - if key in OPTIMIZER_REQUIRES_FIDELITY and not len(search_space.fidelities) > 0: + if key in REQUIRES_FIDELITY and search_space.fidelity is None: pytest.xfail(f"{key} requires a fidelity parameter") kwargs: dict[str, Any] = {} @@ -171,10 +165,9 @@ def case_neps_state_filebased( def test_sample_trial( neps_state: NePSState, optimizer_and_key_and_search_space: tuple[AskFunction, str, SearchSpace], + capsys, ) -> None: optimizer, key, search_space = optimizer_and_key_and_search_space - if key in REQUIRES_COST and neps_state.lock_and_get_optimizer_state().budget is None: - pytest.xfail(f"{key} requires a cost budget") assert neps_state.lock_and_read_trials() == {} assert neps_state.lock_and_get_next_pending_trial() is None