In [1]:
!pip install transformers optuna

[0m

In [2]:
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer

# 데이터셋 로드
dataset = load_dataset("glue", "mrpc")

# 토크나이저 및 모델 로드
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 데이터셋 전처리
def preprocess_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)

encoded_dataset = dataset.map(preprocess_function, batched=True)

Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/649k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]



tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight', 'classifier.

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

In [3]:
import optuna
from transformers import TrainerCallback

# Optuna를 사용하여 하이퍼파라미터 튜닝 함수 정의
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

def objective(trial):
    # 하이퍼파라미터 범위 정의
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 5e-5)
    per_device_train_batch_size = trial.suggest_categorical('per_device_train_batch_size', [8, 16, 32])
    num_train_epochs = trial.suggest_int('num_train_epochs', 2, 5)

    # 훈련 인자 설정
    training_args = TrainingArguments(
        output_dir="./results",
        learning_rate=learning_rate,
        per_device_train_batch_size=per_device_train_batch_size,
        num_train_epochs=num_train_epochs,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        disable_tqdm=False,
        logging_dir="./logs",
        logging_steps=10,
        load_best_model_at_end=True,
    )

    # Trainer 정의
    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset["validation"],
        tokenizer=tokenizer,
    )

    # 모델 학습 및 평가
    trainer.train()
    eval_result = trainer.evaluate()

    # 평가 결과의 지표를 반환 (여기서는 정확도를 사용)
    return eval_result['eval_accuracy']

# Optuna 스터디 생성 및 최적화 실행
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

# 최적의 하이퍼파라미터 출력
print("Best hyperparameters:", study.best_params)

[I 2024-09-11 10:56:11,043] A new study created in memory with name: no-name-e4438130-741b-4857-9d17-c8ec7bc33e42
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 5e-5)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a mo

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss
1,0.4518,0.393252
2,0.4173,0.364704
3,0.3833,0.544787
4,0.3834,0.585741


[W 2024-09-11 10:57:04,495] Trial 0 failed with parameters: {'learning_rate': 1.8238108477646022e-05, 'per_device_train_batch_size': 8, 'num_train_epochs': 4} because of the following error: KeyError('eval_accuracy').
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_829746/3291160273.py", line 42, in objective
    return eval_result['eval_accuracy']
KeyError: 'eval_accuracy'
[W 2024-09-11 10:57:04,496] Trial 0 failed with value None.


KeyError: 'eval_accuracy'

In [None]:
best_hyperparameters = study.best_params

training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=best_hyperparameters['learning_rate'],
    per_device_train_batch_size=best_hyperparameters['per_device_train_batch_size'],
    num_train_epochs=best_hyperparameters['num_train_epochs'],
    evaluation_strategy="epoch",
    save_strategy="epoch",
    disable_tqdm=False,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
)

trainer.train()