In [1]:
import pandas as pd

data = pd.read_csv("emotion.csv", index_col=0, nrows=4000)
bert_data = data['text']
y = data['label']

In [3]:
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset

le = LabelEncoder()
# Prepare data (raw text + label encoded)
df_bert = pd.DataFrame({'text': bert_data, 'label': le.fit_transform(y)})
train_df, test_df = train_test_split(df_bert, test_size=0.2, stratify=df_bert['label'])

# HuggingFace Dataset format
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# Tokenize
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def tokenize(batch):
    return tokenizer(batch['text'], truncation=True, padding='max_length', max_length=128)
train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

# Format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Custom objective function (maximize validation accuracy)
def compute_objective(metrics):
    return metrics["eval_accuracy"]

# Hyperparameter space
def hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 2e-5, 5e-5, log=True),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 2, 5),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [8, 16, 32])
    }

# Redefine Trainer with placeholder args
training_args = TrainingArguments(
    output_dir="./bert_hp_output",
    logging_dir="./bert_hp_logs",
    save_strategy="no",
    disable_tqdm=True
)

def model_init():
    return BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",
        num_labels=6
    )

trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=lambda p: {
        "eval_accuracy": (p.predictions.argmax(axis=-1) == p.label_ids).mean()
    }
)

# Start tuning
best_trial = trainer.hyperparameter_search(
    direction="maximize",
    hp_space=hp_space,
    compute_objective=compute_objective,
    n_trials=5
)

print("Best BERT hyperparameters:", best_trial.hyperparameters)


Map: 100%|██████████| 3200/3200 [00:00<00:00, 3214.48 examples/s]
Map: 100%|██████████| 800/800 [00:00<00:00, 3292.18 examples/s]
  trainer = Trainer(
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2025-06-24 11:37:50,585] A new study created in memory with name: no-name-a7e69f5f-e135-488b-99d7-44c71a9f4698
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.4745, 'grad_norm': 4.742188453674316, 'learning_rate': 7.981384027594306e-06, 'epoch': 2.5}
{'train_runtime': 6906.7265, 'train_samples_per_second': 1.39, 'train_steps_per_second': 0.087, 'train_loss': 0.41117128054300944, 'epoch': 3.0}
{'eval_accuracy': 0.91125, 'eval_loss': 0.30465707182884216, 'eval_runtime': 177.7326, 'eval_samples_per_second': 4.501, 'eval_steps_per_second': 0.563, 'epoch': 3.0}


[I 2025-06-24 13:35:57,069] Trial 0 finished with value: 0.91125 and parameters: {'learning_rate': 4.741416254016419e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 16}. Best is trial 0 with value: 0.91125.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.4698, 'grad_norm': 4.130002021789551, 'learning_rate': 5.132429074196233e-08, 'epoch': 5.0}
{'train_runtime': 10456.0731, 'train_samples_per_second': 1.53, 'train_steps_per_second': 0.048, 'train_loss': 0.46978387451171877, 'epoch': 5.0}


[I 2025-06-24 16:33:15,981] Trial 1 finished with value: 0.9025 and parameters: {'learning_rate': 2.5662145370981165e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 32}. Best is trial 0 with value: 0.91125.


{'eval_accuracy': 0.9025, 'eval_loss': 0.32960519194602966, 'eval_runtime': 161.7014, 'eval_samples_per_second': 4.947, 'eval_steps_per_second': 0.618, 'epoch': 5.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.8405, 'grad_norm': 9.125992774963379, 'learning_rate': 1.3622959336384311e-05, 'epoch': 1.25}
{'loss': 0.2147, 'grad_norm': 5.580007553100586, 'learning_rate': 3.90615524481205e-06, 'epoch': 2.5}
{'train_runtime': 6671.6288, 'train_samples_per_second': 1.439, 'train_steps_per_second': 0.18, 'train_loss': 0.46282588799794516, 'epoch': 3.0}
{'eval_accuracy': 0.91125, 'eval_loss': 0.3528992831707001, 'eval_runtime': 183.1384, 'eval_samples_per_second': 4.368, 'eval_steps_per_second': 0.546, 'epoch': 3.0}


[I 2025-06-24 18:27:33,041] Trial 2 finished with value: 0.91125 and parameters: {'learning_rate': 2.3320329819773432e-05, 'num_train_epochs': 3, 'per_device_train_batch_size': 8}. Best is trial 0 with value: 0.91125.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.8513, 'grad_norm': 15.55484676361084, 'learning_rate': 8.710598081453367e-06, 'epoch': 1.25}
{'train_runtime': 4972.9471, 'train_samples_per_second': 1.287, 'train_steps_per_second': 0.161, 'train_loss': 0.6436433601379394, 'epoch': 2.0}
{'eval_accuracy': 0.88, 'eval_loss': 0.37967589497566223, 'eval_runtime': 167.0422, 'eval_samples_per_second': 4.789, 'eval_steps_per_second': 0.599, 'epoch': 2.0}


[I 2025-06-24 19:53:15,774] Trial 3 finished with value: 0.88 and parameters: {'learning_rate': 2.315109124638769e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 8}. Best is trial 0 with value: 0.91125.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.5425, 'grad_norm': 8.587068557739258, 'learning_rate': 1.4855525661837378e-05, 'epoch': 2.5}
{'loss': 0.0828, 'grad_norm': 2.076876401901245, 'learning_rate': 2.965174782801872e-08, 'epoch': 5.0}
{'train_runtime': 10987.6537, 'train_samples_per_second': 1.456, 'train_steps_per_second': 0.091, 'train_loss': 0.31265400314331054, 'epoch': 5.0}
{'eval_accuracy': 0.9225, 'eval_loss': 0.32162874937057495, 'eval_runtime': 173.2008, 'eval_samples_per_second': 4.619, 'eval_steps_per_second': 0.577, 'epoch': 5.0}


[I 2025-06-24 22:59:20,020] Trial 4 finished with value: 0.9225 and parameters: {'learning_rate': 2.965174782801872e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 16}. Best is trial 4 with value: 0.9225.


Best BERT hyperparameters: {'learning_rate': 2.965174782801872e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 16}
