<a href="https://colab.research.google.com/github/jmgb27/distilBERT-finetuned-sentiment-analysis/blob/main/distilBERT_finetuning_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [302]:
!pip install transformers torch



In [303]:
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn as nn
import torch
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [304]:
data = pd.read_csv('/content/sst2_train.csv')

In [305]:
# Load the BERT tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

In [306]:
# Tokenize the data
tokens = tokenizer.batch_encode_plus(
    data['sentence'].to_list(),
    padding=True,
    truncation=True,
    return_attention_mask=True,
    max_length=128
)

In [308]:
# Create TensorDatasets
dataset = TensorDataset(
    torch.tensor(tokens['input_ids']),
    torch.tensor(tokens['attention_mask']),
    torch.tensor(data['label'])
)

In [309]:
# Split into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

In [310]:
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [311]:
# Load BERT model
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [312]:
# optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5, weight_decay=0.01) # 0.01 is a common value for weight decay


In [313]:
#loss function
loss_function = nn.CrossEntropyLoss()

In [314]:
# Check if a GPU is available and if not, use a CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the device
model = model.to(device)

# Training loop
epochs = 1

for epoch in range(epochs):
    model.train()
    total_loss = 0
    total_correct = 0
    total_examples = 0

    for batch in train_loader:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = batch
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.long().to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        loss = loss_function(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * len(labels)
        predictions = torch.argmax(outputs.logits, dim=1)  # Get the index of the max log-probability
        total_correct += (predictions == labels).sum().item()
        total_examples += len(labels)

    # Print the average loss and accuracy for the epoch
    avg_loss = total_loss / total_examples
    accuracy = total_correct / total_examples
    print(f"Epoch {epoch + 1}: Loss = {avg_loss:.4f}, Accuracy = {accuracy * 100:.2f}%")


Epoch 1: Loss = 0.2315, Accuracy = 90.62%


In [315]:
def predict_sentiment(text):
    # Put the model in evaluation mode
    model.eval()

    # Tokenize the input text
    inputs = tokenizer(
        text,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

    # Move the inputs to the device
    inputs = {key: tensor.to(device) for key, tensor in inputs.items()}

    # Get the model's prediction
    with torch.no_grad():
        outputs = model(**inputs)

    # The output logits are raw scores for the classes
    logits = outputs.logits
    # Use softmax to compute probabilities
    probs = torch.softmax(logits, dim=1)
    print(probs)
    # Get the class with the highest probability
    prediction = torch.argmax(probs, dim=1).item()

    # Convert the prediction to a human-readable label
    if prediction == 1:
        sentiment = "Positive"
    else:
        sentiment = "Negative"

    return sentiment


In [316]:
text = "todays weather is bad!"
sentiment = predict_sentiment(text)
print(f"The sentiment for the text is: {sentiment}")

tensor([[0.9926, 0.0074]], device='cuda:0')
The sentiment for the text is: Negative
