<a href="https://colab.research.google.com/github/ever-oli/MLby22/blob/main/SentimentAnalysisBERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# BERT FINE-TUNING FOR SENTIMENT ANALYSIS


# Run this cell to install the required Hugging Face ecosystem and emoji libraries
# !pip install transformers datasets evaluate torch emoji -q

import torch
import emoji
import numpy as np
import evaluate
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Executing on device: {device}\n")


# 1. Data Loading (Tweet Sentiment)

print("Loading tweet_eval sentiment dataset from Hugging Face...")
# This dataset has 3 classes: 0 (Negative), 1 (Neutral), 2 (Positive)
dataset = load_dataset("tweet_eval", "sentiment")

# For Colab demonstration purposes, we will take a smaller subset
# to keep training time under 10 minutes.
small_train_dataset = dataset["train"].shuffle(seed=42).select(range(2000))
small_eval_dataset = dataset["validation"].shuffle(seed=42).select(range(500))

print(f"Training subset: {len(small_train_dataset)} rows")
print(f"Validation subset: {len(small_eval_dataset)} rows\n")


# 2. Text Pre-processing (Handling Emojis)

print("Applying text pre-processing (Demojization)...")

def preprocess_text(example):
    # Convert emojis to text equivalents so the tokenizer can understand the sentiment
    # For example, a thumbs up becomes ":thumbs_up:"
    example['text'] = emoji.demojize(example['text'], language='en')
    return example

# Apply the preprocessing to the datasets
small_train_dataset = small_train_dataset.map(preprocess_text)
small_eval_dataset = small_eval_dataset.map(preprocess_text)


# 3. Tokenization

print("Loading BERT tokenizer...")
model_checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    # Truncate to maximum sequence length and pad dynamically later
    return tokenizer(examples["text"], truncation=True, max_length=128)

print("Tokenizing datasets...")
tokenized_train = small_train_dataset.map(tokenize_function, batched=True)
tokenized_eval = small_eval_dataset.map(tokenize_function, batched=True)

# The data collator handles dynamic padding for batches, speeding up training
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


# 4. Model Initialization

print("\nInitializing pre-trained BERT model...")
# We specify num_labels=3 because our dataset has Negative, Neutral, Positive
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=3
)
model.to(device)


# 5. Training Setup (Hugging Face Trainer)

# Load standard evaluation metrics
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Define training hyperparameters
training_args = TrainingArguments(
    output_dir="./bert-sentiment-results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    eval_strategy="epoch",  # UPDATED: Replaced evaluation_strategy with eval_strategy
    save_strategy="epoch",
    load_best_model_at_end=True,
    logging_dir='./logs',
    logging_steps=50,
    report_to="none" # Disables external logging to WandB/TensorBoard for this demo
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


# 6. Model Training & Evaluation

print("\nCommencing Fine-Tuning Process...")
trainer.train()

print("\nEvaluating the best model on the validation set...")
eval_results = trainer.evaluate()
print(f"Final Validation Accuracy: {eval_results['eval_accuracy'] * 100:.2f}%")


# 7. Inference Example

print("\nTesting the model with custom text:")
test_sentences = [
    "I absolutely love the new design, it works perfectly! :fire:",
    "This was a terrible waste of my time, the product arrived broken.",
    "It is okay, nothing special but it gets the job done."
]

model.eval()
with torch.no_grad():
    for text in test_sentences:
        # Preprocess text just like training
        processed_text = emoji.demojize(text, language='en')
        inputs = tokenizer(processed_text, return_tensors="pt", truncation=True, max_length=128).to(device)
        outputs = model(**inputs)

        # Apply softmax to get probabilities
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        prediction = torch.argmax(probs, dim=-1).item()

        labels_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
        print(f"Text: '{text}' --> Prediction: {labels_map[prediction]}")

Executing on device: cuda

Loading tweet_eval sentiment dataset from Hugging Face...
Training subset: 2000 rows
Validation subset: 500 rows

Applying text pre-processing (Demojization)...


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Loading BERT tokenizer...
Tokenizing datasets...


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]


Initializing pre-trained BERT model...


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertForSequenceClassification LOAD REPORT from: bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.
`logging_dir` is deprecated and will 


Commencing Fine-Tuning Process...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.954925,0.843295,0.622
2,0.641048,0.761813,0.668
3,0.471104,0.796537,0.652


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.encoder.layer.1.output.LayerNorm.weight', 'bert.encoder.layer.1.output.LayerNorm.bias', 'bert.encoder.layer.2.attention.output.LayerNorm.weight', 'bert.encoder.layer.2.attention.output.LayerNorm.bias', 'bert.encoder.layer.2.output.LayerNorm.weight', 'bert.encoder.layer.2.output.LayerNorm.bias', 'bert.encoder.layer.3.attention.output.LayerNorm.weight', 'bert.encoder.layer.3.attention.output.LayerNorm.bias', 'bert.encoder.layer.3.output.LayerNorm.weight', 'bert.encoder.layer.3.output.LayerNorm.bias', 'bert.encoder.layer.4.attention.output.La


Evaluating the best model on the validation set...


Final Validation Accuracy: 66.80%

Testing the model with custom text:
Text: 'I absolutely love the new design, it works perfectly! :fire:' --> Prediction: Positive
Text: 'This was a terrible waste of my time, the product arrived broken.' --> Prediction: Negative
Text: 'It is okay, nothing special but it gets the job done.' --> Prediction: Positive
