# Hyperparameter Optimization with Optuna and Transformers

_Authored by: [Parag Ekbote](https://github.com/ParagEkbote)_

In this notebook, we are going



!pip install -q datasets evaluate transformers

In [None]:
from datasets import load_dataset
import evaluate

from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import set_seed
from transformers import Trainer
from transformers import TrainingArguments


set_seed(42)


train_dataset = load_dataset("imdb", split="train").shuffle(seed=42).select(range(2500))
valid_dataset = load_dataset("imdb", split="test").shuffle(seed=42).select(range(1000))

model_name = "lvwerra/distilbert-imdb"
tokenizer = AutoTokenizer.from_pretrained(model_name)


def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)


tokenized_train = train_dataset.map(tokenize, batched=True).select_columns(
    ["input_ids", "attention_mask", "label"]
)
tokenized_valid = valid_dataset.map(tokenize, batched=True).select_columns(
    ["input_ids", "attention_mask", "label"]
)


metric = evaluate.load("accuracy")


def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


# Set the Metrics and define the model

In [None]:
def compute_metrics(eval_pred):
    predictions = eval_pred.predictions.argmax(axis=-1)
    labels = eval_pred.label_ids
    return metric.compute(predictions=predictions, references=labels)


def compute_objective(metrics):
    return metrics["eval_accuracy"]


training_args = TrainingArguments(
    eval_strategy="epoch",
    save_strategy="best",
    load_best_model_at_end=True,
    logging_strategy="epoch",
    report_to="none",
)


trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

# Define the Search Space and Start the Trials

In [None]:
def optuna_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "per_device_train_batch_size": trial.suggest_categorical(
            "per_device_train_batch_size", [16, 32, 64, 128]
        ),
    }


best_run = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=optuna_hp_space,
    n_trials=5,
    compute_objective=compute_objective,
)

print(best_run)