# Model 3: Transformer Fine-Tuning

Transformer-based models leverage contextual embeddings learned from large-scale corpora, enabling superior semantic understanding compared to traditional neural architectures.

Steps:
1. Load raw or lightly cleaned text
2. Tokenization with pretrained tokenizer (attention masks)
3. Model loading and fine-tuning
4. Evaluation on test set
5. Save metrics

## 1. Load raw text

In [1]:
from pathlib import Path
import re

import numpy as np
import pandas as pd

DATA_DIR = Path("../data")

train_df = pd.read_csv(DATA_DIR / "train.txt", sep=";", header=None, names=["text", "emotion"])
test_df = pd.read_csv(DATA_DIR / "test.txt", sep=";", header=None, names=["text", "emotion"])


def clean_text(text: str) -> str:
    text = text.lower().strip()
    text = re.sub(r"\s+", " ", text)
    return text


train_df["clean_text"] = train_df["text"].apply(clean_text)
test_df["clean_text"] = test_df["text"].apply(clean_text)

print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")

train_df.head()

Train shape: (16000, 3)
Test shape: (2000, 3)


Unnamed: 0,text,emotion,clean_text
0,i didnt feel humiliated,sadness,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,anger,i am feeling grouchy


## 2. Tokenization with pretrained tokenizer

In [2]:
from datasets import Dataset
from transformers import AutoTokenizer

model_name = "bert-base-uncased"
max_length = 128

label_names = sorted(train_df["emotion"].unique())
label_to_id = {label: idx for idx, label in enumerate(label_names)}
id_to_label = {idx: label for label, idx in label_to_id.items()}

train_df["label"] = train_df["emotion"].map(label_to_id)
test_df["label"] = test_df["emotion"].map(label_to_id)

train_dataset = Dataset.from_pandas(train_df[["clean_text", "label"]])
test_dataset = Dataset.from_pandas(test_df[["clean_text", "label"]])

tokenizer = AutoTokenizer.from_pretrained(model_name)


def tokenize_batch(batch):
    return tokenizer(
        batch["clean_text"],
        padding="max_length",
        truncation=True,
        max_length=max_length,
    )

train_dataset = train_dataset.map(tokenize_batch, batched=True)
test_dataset = test_dataset.map(tokenize_batch, batched=True)

train_dataset = train_dataset.remove_columns(["clean_text"])
test_dataset = test_dataset.remove_columns(["clean_text"])
train_dataset.set_format("torch")
test_dataset.set_format("torch")

print("Labels:", label_names)

  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 16000/16000 [00:00<00:00, 26426.15 examples/s]
Map: 100%|██████████| 2000/2000 [00:00<00:00, 26500.94 examples/s]

Labels: ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']





## 3. Load model and fine-tune

In [5]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import AutoModelForSequenceClassification, DataCollatorWithPadding, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_names),
    id2label=id_to_label,
    label2id=label_to_id,
)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average="macro", zero_division=0
    )
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }

training_args = TrainingArguments(
    output_dir="../results/transformer_checkpoints",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer

Loading weights: 100%|██████████| 199/199 [00:00<00:00, 1926.04it/s, Materializing param=bert.pooler.dense.weight]                               
[1mBertForSequenceClassification LOAD REPORT[0m from: bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- 

<transformers.trainer.Trainer at 0x2e9c5c2d0>

## 4. Train and evaluate

In [6]:
train_result = trainer.train()
metrics = trainer.evaluate()

print("Eval metrics:", metrics)

  super().__init__(loader)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.21145,0.194616,0.924,0.870656,0.898997,0.883132
2,0.120364,0.173911,0.9265,0.879138,0.887172,0.882896


Writing model shards: 100%|██████████| 1/1 [00:02<00:00,  2.46s/it]
  super().__init__(loader)
Writing model shards: 100%|██████████| 1/1 [00:00<00:00,  1.27it/s]
There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.encoder.layer.1.output.LayerNorm.weight', 'bert.encoder.layer.1.output.LayerNorm.bias', 'bert.encoder.layer.2.attention.output.LayerNorm.weight', 'bert.encoder.layer.2.attention.output.LayerNorm.bias', 'bert.encoder.layer.2.output.LayerNorm.weight', 'bert.encoder.layer.2.output.LayerNorm.bias', 'bert.encoder.layer.3.attention.output.LayerNorm.weight', 'bert.encoder.layer.3.attent

Eval metrics: {'eval_loss': 0.19446279108524323, 'eval_accuracy': 0.924, 'eval_precision': 0.8706561556537277, 'eval_recall': 0.8989970616864502, 'eval_f1': 0.8831323516390204, 'eval_runtime': 73.0269, 'eval_samples_per_second': 27.387, 'eval_steps_per_second': 1.712, 'epoch': 2.0}


## 5. Save metrics

In [7]:
from datetime import datetime

results_dir = Path("../results")
results_dir.mkdir(parents=True, exist_ok=True)
metrics_path = results_dir / "metrics.csv"

row = {
    "model": "TRANSFORMER",
    "accuracy": round(metrics.get("eval_accuracy", 0.0), 4),
    "precision": round(metrics.get("eval_precision", 0.0), 4),
    "recall": round(metrics.get("eval_recall", 0.0), 4),
    "f1": round(metrics.get("eval_f1", 0.0), 4),
    "timestamp": datetime.now().isoformat(timespec="seconds"),
}

metrics_df = pd.DataFrame([row])

if metrics_path.exists():
    metrics_df.to_csv(metrics_path, mode="a", header=False, index=False)
else:
    metrics_df.to_csv(metrics_path, index=False)

print(f"Saved metrics to: {metrics_path}")
metrics_df

Saved metrics to: ../results/metrics.csv


Unnamed: 0,model,accuracy,precision,recall,f1,timestamp
0,TRANSFORMER,0.924,0.8707,0.899,0.8831,2026-02-08T14:55:17
