From bf31c70cf9835728add2f4dc3d930429b6be991a Mon Sep 17 00:00:00 2001 From: herilalaina Date: Mon, 25 Nov 2024 12:33:24 +0100 Subject: [PATCH 1/2] Bumping version to v0.3.11 --- ifbo/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ifbo/version.py b/ifbo/version.py index aa4cd15..91c67cc 100644 --- a/ifbo/version.py +++ b/ifbo/version.py @@ -1 +1 @@ -__version__ = "0.3.10" +__version__ = "0.3.11" From f10efede2ee8ffacedba04f960e0e197e4d7478b Mon Sep 17 00:00:00 2001 From: herilalaina Date: Wed, 4 Jun 2025 12:12:28 +0200 Subject: [PATCH 2/2] add previous training code in train.py --- README.md | 12 +++- ifbo/priors/ftpfn_prior.py | 25 +++++++- ifbo/train.py | 116 +++++++++++++++++++++++++++++++++++-- 3 files changed, 145 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bd96edf..b87d040 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,14 @@ This repository contains the official code for our [ICML 2024 paper](https://openreview.net/forum?id=VyoY3Wh9Wd). `ifBO` is an efficient Bayesian Optimization algorithm that dynamically selects and incrementally evaluates candidates during the optimization process. It uses a model called the `Freeze-Thaw surrogate (FT-PFN)` to predict the performance of candidate configurations as more resources are allocated. The `main` branch includes the necessary API to use `FT-PFN`. Refer to the following sections: - [Surrogate API](#surrogate-api): to learn how to initialize and use the surrogate model. - [Bayesian Optimization with ifBO](#bayesian-optimization-with-ifbo): to understand how to use `ifBO` for Hyperparameter Optimization. +- [Training your own model][#training-your-own-model]: to understand ifBO training pipeline. > To reproduce experiments from the above paper version, please refer to the branch [`icml-2024`](https://github.com/automl/ifBO/tree/icml-2024). # Installation -Requires Python 3.11. +Requires Python 3.11 or later. ```bash pip install -U ifBO @@ -139,6 +140,15 @@ neps.run( ) ``` +## Training your own model + +Train ifBO from scratch with the following command: + +```bash +python -m ifbo.train --epochs 20 --output_path your_own_ifbo.model --seq_len 1000 +``` + +For more training options, run ``python -m ifbo.train -h`` or inspect ``ifbo/train.py``. # Citation diff --git a/ifbo/priors/ftpfn_prior.py b/ifbo/priors/ftpfn_prior.py index fa52ae1..e2d2a81 100644 --- a/ifbo/priors/ftpfn_prior.py +++ b/ifbo/priors/ftpfn_prior.py @@ -505,9 +505,30 @@ def forward(self, *x, **kwargs) -> torch.Tensor: ) return out +class MultiCurvesEncoderSeqLen(torch.nn.Module): + def __init__(self, in_dim: int, out_dim: int, seq_len: int) -> None: + super().__init__() + self.normalizer = torch.nn.Sequential( + encoders.Normalize(0.5, math.sqrt(1 / 12)), + ) + self.epoch_enc = torch.nn.Linear(1, out_dim, bias=False) + self.idcurve_enc = torch.nn.Embedding(seq_len + 1, out_dim) + self.configuration_enc = encoders.get_variable_num_features_encoder(encoders.Linear)( + in_dim - 2, out_dim + ) + + def forward(self, *x, **kwargs) -> torch.Tensor: + x = torch.cat(x, dim=-1) + out = ( + self.epoch_enc(self.normalizer(x[..., 1:2])) + + self.idcurve_enc(x[..., :1].int()).squeeze(2) + + self.configuration_enc(x[..., 2:]) + ) + return out + -def get_encoder() -> Callable[[int, int], torch.nn.Module]: - return lambda num_features, emsize: MultiCurvesEncoder(num_features, emsize) +def get_encoder(seq_len) -> Callable[[int, int], torch.nn.Module]: + return lambda num_features, emsize: MultiCurvesEncoderSeqLen(num_features, emsize, seq_len) def sample_curves( diff --git a/ifbo/train.py b/ifbo/train.py index cbd5d81..4951c44 100755 --- a/ifbo/train.py +++ b/ifbo/train.py @@ -6,6 +6,7 @@ import time from typing import Any +import argparse import torch from torch import nn from torch.cuda.amp import autocast @@ -13,15 +14,18 @@ from tqdm import tqdm from ifbo import positional_encodings -from ifbo import utils +from ifbo import utils, encoders, bar_distribution from ifbo.bar_distribution import BarDistribution from ifbo.bar_distribution import get_custom_bar_dist -from ifbo.priors import prior +from ifbo.priors import prior, ftpfn_prior +from ifbo.priors.utils import get_batch_to_dataloader from ifbo.transformer import TransformerModel from ifbo.utils import get_cosine_schedule_with_warmup from ifbo.utils import get_openai_lr from ifbo.utils import init_dist +from ifbo.utils import default_device + class Losses: def get_cross_entropy_loss(self, num_classes: int) -> nn.CrossEntropyLoss: @@ -205,8 +209,8 @@ def train_epoch() -> tuple[float, list[float], float, float, float, float, float total_loss = 0.0 total_positional_losses = torch.zeros(bptt) total_positional_losses_recorded = torch.zeros(bptt) - nan_steps = torch.zeros(1) - ignore_steps = torch.zeros(1) + nan_steps = torch.zeros(1).to(device) + ignore_steps = torch.zeros(1).to(device) before_get_batch = time.time() assert ( len(dl) % aggregate_k_gradients == 0 @@ -384,7 +388,7 @@ def apply_batch_wise_criterion(i: int) -> torch.Tensor: } if step_callback is not None and rank == 0: step_callback(metrics_to_log) - nan_steps += nan_share + nan_steps += nan_share.detach() ignore_steps += (targets == -100).float().mean() except Exception as e: print("Invalid step encountered, skipping...") @@ -459,3 +463,105 @@ def apply_batch_wise_criterion(i: int) -> torch.Tensor: return total_loss, total_positional_losses, model.to("cpu"), dl return None + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Train an ifBO model") + + # transformer model parameters + parser.add_argument("--nlayers", type=int, help="Number of layers", default=6) + parser.add_argument("--emsize", type=int, default=512, help="Size of Embeddings") + parser.add_argument("--nhead", type=int, default=4, help="Number of heads") + + # PFN parameters + parser.add_argument( + "--num_borders", + type=int, + default=1000, + help="Number of borders considered in Bar distribution", + ) + + # Prior parameters + parser.add_argument("--seq_len", type=int, required=True, help="Maximum sequence length") + parser.add_argument( + "--num_features", + type=int, + required=False, + help="The total number of features for each datapoint in an example.", + default=12, # has to be at least 3 + ) + parser.add_argument( + "--power_single_eval_pos_sampler", + type=int, + required=False, + help="Power of an exponential distribution to weight sampling of single eval pos.", + default=-2, + ) + + # training parameters + parser.add_argument("--epochs", type=int, required=True, help="Number of Training Epochs") + parser.add_argument("--batch_size", type=int, default=25, help="Batch Size for Training") + parser.add_argument("--lr", type=float, default=0.0001, help="Learning Rate") + parser.add_argument("--steps_per_epoch", type=int, default=100, help="Number of Steps per Epoch") + parser.add_argument( + "--train_mixed_precision", + action="store_true", + help="Enable Mixed Precision Training", + ) + parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use") + + # other parameters + parser.add_argument("--output_path", type=str, required=True, help="Path to save the model") + + args = parser.parse_args() + + seq_len = args.seq_len + + bucket_limits = torch.linspace(0.0, 1.0, args.num_borders).to(default_device) + criterion = bar_distribution.BarDistribution(bucket_limits) + + single_eval_pos_gen = utils.get_weighted_single_eval_pos_sampler( + max_len=seq_len, + min_len=0, + p=args.power_single_eval_pos_sampler, + ) + + configs_train = { + "nlayers": args.nlayers, + "emsize": args.emsize, + "epochs": args.epochs, + "lr": args.lr, + "nhead": args.nhead, + "bptt": seq_len, + "steps_per_epoch": args.steps_per_epoch, + "train_mixed_precision": args.train_mixed_precision, + "batch_size": args.batch_size, + } + configs_train["bptt"] = seq_len + configs_train["nhid"] = args.emsize * 2 + configs_train["warmup_epochs"] = args.epochs // 4 + configs_train.update( + dict( + priordataloader_class=get_batch_to_dataloader(ftpfn_prior.get_batch), + criterion=criterion, + encoder_generator=ftpfn_prior.get_encoder(seq_len), + y_encoder_generator=encoders.get_normalized_uniform_encoder( + encoders.Linear + ), + extra_prior_kwargs_dict={ + "num_features": args.num_features, + }, + single_eval_pos_gen=single_eval_pos_gen, + style_encoder_generator=None + ) + ) + + total_loss, total_positional_losses, model, dl = train( + **configs_train + ) + print(f"Total loss: {total_loss}, Total positional losses: {total_positional_losses}") + torch.save( + model, + args.output_path, + ) + print(f"Model saved to {args.output_path}") +