# How to Build and Train a Transformer Model from Scratch with Hugging Face Transformers

https://www.kdnuggets.com/how-to-build-and-train-a-transformer-model-from-scratch-with-hugging-face-transformers

#### Load dataset with emotions

In [1]:
from datasets import load_dataset
dataset = load_dataset('jeffnyman/emotions',cache_dir='tmp')

In [2]:
print(dataset["test"]["text"][:5])
print(dataset["test"]["label"][:5])

['im feeling rather rotten so im not very ambitious right now', 'im updating my blog because i feel shitty', 'i never make her separate from me because i don t ever want her to feel like i m ashamed with her', 'i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived', 'i was feeling a little vain when i did this one']
[0, 0, 0, 1, 0]


#### Tokenise the sentences

In [3]:
from transformers import AutoTokenizer

def tokenize_function(examples):
  return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
tokenized_datasets = dataset.map(tokenize_function, batched=True)



Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [4]:
unique_labels = set(tokenized_datasets['train']['label'])
print(f"Unique labels in the training set: {unique_labels}")

def check_labels(dataset):
  for label in dataset['train']['label']:
    if label not in unique_labels:
      print(f"Found invalid label: {label}")

check_labels(tokenized_datasets)

Unique labels in the training set: {0, 1, 2, 3, 4, 5}


#### Setup transformer

In [5]:
from transformers import BertConfig
from transformers import BertForSequenceClassification

config = BertConfig(
vocab_size=tokenizer.vocab_size,
hidden_size=512,
num_hidden_layers=6,
num_attention_heads=8,
intermediate_size=2048,
max_position_embeddings=512,
num_labels=len(unique_labels)
)

model = BertForSequenceClassification(config)

#### Setup training arguments

In [6]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
  output_dir='./results',
  evaluation_strategy="epoch",
  learning_rate=2e-5,
  per_device_train_batch_size=16,
  per_device_eval_batch_size=16,
  num_train_epochs=3,
  weight_decay=0.01,
)

trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=tokenized_datasets["train"],
  eval_dataset=tokenized_datasets["test"],
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None)


In [7]:
trainer.train()

  0%|          | 0/3000 [00:00<?, ?it/s]

{'loss': 1.5832, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.5}
{'loss': 1.4797, 'learning_rate': 1.3333333333333333e-05, 'epoch': 1.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_loss': 1.2578352689743042, 'eval_runtime': 14.8019, 'eval_samples_per_second': 135.118, 'eval_steps_per_second': 8.445, 'epoch': 1.0}
{'loss': 1.2188, 'learning_rate': 1e-05, 'epoch': 1.5}
{'loss': 1.0562, 'learning_rate': 6.666666666666667e-06, 'epoch': 2.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_loss': 0.9186496734619141, 'eval_runtime': 14.7876, 'eval_samples_per_second': 135.248, 'eval_steps_per_second': 8.453, 'epoch': 2.0}
{'loss': 0.8412, 'learning_rate': 3.3333333333333333e-06, 'epoch': 2.5}
{'loss': 0.7339, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_loss': 0.6735637187957764, 'eval_runtime': 14.8439, 'eval_samples_per_second': 134.736, 'eval_steps_per_second': 8.421, 'epoch': 3.0}
{'train_runtime': 1043.0841, 'train_samples_per_second': 46.017, 'train_steps_per_second': 2.876, 'train_loss': 1.1521451110839844, 'epoch': 3.0}


TrainOutput(global_step=3000, training_loss=1.1521451110839844, metrics={'train_runtime': 1043.0841, 'train_samples_per_second': 46.017, 'train_steps_per_second': 2.876, 'train_loss': 1.1521451110839844, 'epoch': 3.0})

In [8]:
model.save_pretrained("emotions_model")

Test

In [18]:
import torch

In [21]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the correct device
model.to(device)

# Sample texts to test the model
texts = [
    "I'm so happy today!",
    "This is the worst day of my life.",
    "I'm feeling quite neutral about everything.",
    "I'm extremely excited about the new project!"
]

# Tokenize the test data
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")

# Move the input tensors to the correct device
inputs = {k: v.to(device) for k, v in inputs.items()}

# Make predictions
with torch.no_grad():
    outputs = model(**inputs)

# Get the logits (raw prediction scores)
logits = outputs.logits

# Get predicted class labels
predicted_classes = torch.argmax(logits, dim=1)

# Assuming you have the emotion labels (replace with your labels)
emotion_labels = ['anger', 'joy', 'love', 'sadness', 'surprise', 'fear']

# Map the predicted classes to their corresponding emotion labels
predicted_emotions = [emotion_labels[i] for i in predicted_classes]

# Print the predictions
for text, emotion in zip(texts, predicted_emotions):
    print(f"Text: {text} => Predicted Emotion: {emotion}")

Text: I'm so happy today! => Predicted Emotion: joy
Text: This is the worst day of my life. => Predicted Emotion: joy
Text: I'm feeling quite neutral about everything. => Predicted Emotion: joy
Text: I'm extremely excited about the new project! => Predicted Emotion: joy
