# Hyperparameter Search Examples with 🤗 Transformers
This notebook demonstrates how to use various hyperparameter optimization backends (Optuna, Ray Tune, SigOpt, W&B) with 🤗 Transformers' `Trainer`.

## Setup
Before running the examples below, make sure to install all required packages:
```bash
pip install transformers optuna "ray[tune]" sigopt wandb datasets
```

## Data & Model Initialization

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# Load a small subset of SST-2
dataset = load_dataset("glue", "sst2", split="train[:200]")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def preprocess(examples):
    return tokenizer(examples["sentence"], truncation=True, padding="max_length")

dataset = dataset.map(preprocess, batched=True).train_test_split(test_size=0.2)

def model_init():
    return AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)


## Common Objective Function

In [None]:
# Single-objective: minimize eval_loss
def compute_objective(metrics):
    return metrics["eval_loss"]


## 1. Optuna Example

In [None]:
import optuna
from transformers.integrations import EarlyStoppingCallback

def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.3),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 1, 3),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64]),
        "warmup_steps": trial.suggest_int("warmup_steps", 0, 100),
    }

training_args = TrainingArguments("optuna-hpo", evaluation_strategy="epoch", logging_steps=10)

trainer = Trainer(
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    model_init=model_init,
    compute_metrics=lambda p: {"eval_loss": p.loss},
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)],
)

best_trial = trainer.hyperparameter_search(
    direction="minimize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=5,
    compute_objective=compute_objective,
)

print("Best Optuna trial:", best_trial)


## 2. Ray Tune Example

In [None]:
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.hyperopt import HyperOptSearch

def ray_hp_space(trial_config):
    return {
        "learning_rate": trial_config["learning_rate"],
        "per_device_train_batch_size": trial_config["per_device_train_batch_size"],
        "num_train_epochs": trial_config["num_train_epochs"],
    }

ray_search_space = {
    "learning_rate": tune.loguniform(1e-5, 1e-3),
    "per_device_train_batch_size": tune.choice([16, 32, 64]),
    "num_train_epochs": tune.choice([2, 3, 4]),
}

training_args = TrainingArguments("ray-hpo", evaluation_strategy="epoch", logging_steps=10)

trainer = Trainer(
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    model_init=model_init,
    compute_metrics=lambda p: {
        "eval_loss": p.loss,
        "eval_accuracy": (p.predictions.argmax(-1) == p.label_ids).mean()
    },
)

best_run = trainer.hyperparameter_search(
    direction="maximize",
    backend="ray",
    hp_space=ray_hp_space,
    n_trials=5,
    search_alg=HyperOptSearch(metric="eval_accuracy", mode="max"),
    scheduler=ASHAScheduler(metric="eval_accuracy", mode="max", max_t=3),
    resources_per_trial={"cpu": 1, "gpu": 0},
    compute_objective=lambda metrics: metrics["eval_accuracy"],
)

print("Best Ray Tune run:", best_run)


## 3. SigOpt Example

In [None]:
def sigopt_hp_space(trial):
    return [
        {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
        {"bounds": {"min": 0.0, "max": 0.3},   "name": "weight_decay", "type": "double"},
        {"categorical_values": ["16", "32", "64"], "name": "per_device_train_batch_size", "type": "categorical"},
        {"bounds": {"min": 1, "max": 3},        "name": "num_train_epochs", "type": "int"},
    ]

best_trials = trainer.hyperparameter_search(
    direction=["minimize", "maximize"],
    backend="sigopt",
    hp_space=sigopt_hp_space,
    n_trials=5,
    compute_objective=lambda m: (m["eval_loss"], m["eval_accuracy"])
)

print("Best SigOpt trials:", best_trials)


## 4. Weights & Biases (W&B) Example

In [None]:
import wandb

def wandb_hp_space(trial):
    return {
        "method": "random",
        "metric": {"name": "eval_loss", "goal": "minimize"},
        "parameters": {
            "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
            "per_device_train_batch_size": {"values": [16, 32, 64]},
            "num_train_epochs": {"values": [1, 2, 3]},
        },
    }

best_runs = trainer.hyperparameter_search(
    direction="minimize",
    backend="wandb",
    hp_space=wandb_hp_space,
    n_trials=5,
    compute_objective=compute_objective,
)

print("Best W&B runs:", best_runs)


**End of examples.**

You can adjust `n_trials`, early stopping, objective functions, and other settings to suit your specific task.