# Finetune bert classifier for sentiment classification
Example from https://huggingface.co/docs/transformers/training

# Development environment


In [1]:
! pip install transformers[torch]
! pip install datasets
! pip install evaluate
! pip install scikit-learn
! pip install wandb




In [2]:
import warnings
warnings.filterwarnings("ignore")

import transformers
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import TrainingArguments, Trainer
import wandb
import time

import numpy as np
import evaluate


2024-01-17 13:40:13.358437: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-17 13:40:13.408949: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Login to Weights and Biases


In [3]:
wandb.login()


[34m[1mwandb[0m: Currently logged in as: [33moliviamoveon[0m ([33molivia-liu[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
wandb.init(
      # Set the project where this run will be logged
      project="sutd-mlops-project", 
      # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
      name=f"experiment_session3_run_1", 
      # Track hyperparameters and run metadata
      config={
          "learning_rate": 2e-5,
          "weight_decay": 0.01,
          "num_train_epochs": 10,
          "train_subsample_size": 1000,
          "architecture": "distilbert",
          "dataset_name": "rotten_tomatoes",
          "model_name": "distilbert-base-uncased"
      })
config = wandb.config

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7f8edda376a0>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f903e8bbf70, execution_count=4 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7f8eddbb7df0, raw_cell="wandb.init(
      # Set the project where this run.." store_history=True silent=False shell_futures=True cell_id=3cbdacef-88c6-4163-bf8f-7a91e766bf6d> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

# Prepare data


In [None]:
dataset = load_dataset("rotten_tomatoes")
dataset["train"][0]

In [None]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [None]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["validation"].shuffle(seed=42).select(range(100))
small_test_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(100))

# Train the model


In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

In [None]:
metric = evaluate.load("accuracy")

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
repo_name = "finetuning-sentiment-model"

training_args = TrainingArguments(
    output_dir=repo_name,
    report_to="wandb",
    evaluation_strategy="steps",
    learning_rate=2e-5,
    weight_decay=0.01,
    num_train_epochs=1,
    logging_steps=20)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

# Test the model


In [None]:
# Accuracy on training set
trainer.evaluate(small_train_dataset)

In [None]:
# Accuracy on validation set
trainer.evaluate(small_eval_dataset)

In [None]:
# Accuracy on test set
trainer.evaluate(small_test_dataset)


In [None]:
wandb.finish()


# What to try next

- train and evaluate with the complete training and test dataset instead of a sample
- experiment with different training parameters (number of epochs, optimizers, batch size, learning rate schedule, ...)
- compare DistilBERT vs the full BERT model: https://huggingface.co/bert-base-uncased
- compare the results with the scikit model from the previous notebook. What is the cost-benefit trade off between deep learning and traditional ML?
- Check out this more detailed sentiment tutorial on Huggingface https://huggingface.co/blog/sentiment-analysis-python