In [1]:
# import the necessary libraries
import torch
import numpy as np
import pandas as pd
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import warnings

warnings.filterwarnings("ignore", category=UserWarning, module="torchtext")

In [2]:
# set random seed for reproducibility
SEED = 1234
torch.manual_seed(SEED)
np.random.seed(SEED)
torch.backends.cudnn.deterministic = True

# load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_input_length = tokenizer.model_max_length

# load the dataset into a Pandas DataFrame
df = pd.read_csv('Tweets.csv', index_col='tweet_id')

# convert categorical variable into numerical values
df['airline_sentiment'] = df['airline_sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2})

# define a customer PyTorch dataset class
class TweetDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.dropna(subset=['text', 'airline_sentiment'])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data.iloc[idx]['text']
        label = self.data.iloc[idx]['airline_sentiment']
        return label, text

# split the dataset into 80% training and 20% testing
train_df, test_df = train_test_split(df, test_size=0.2, random_state=SEED)

# create dataset instances
train_dataset = TweetDataset(train_df)
test_dataset = TweetDataset(test_df)

# optimized batch collation function
def collate_batch(batch):
    labels, texts = zip(*batch)
    labels = torch.tensor(labels, dtype=torch.float)
    texts = tokenizer(list(texts), truncation=True, padding="max_length", max_length=max_input_length, return_tensors="pt")
    return labels.to(device), texts["input_ids"].to(device)

# define batch size and device
BATCH_SIZE = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device, 'activated')
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
print()

# create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_batch)

# check dataset shape from one batch
batch = next(iter(test_dataloader))
labels, input_ids = batch
print(labels.shape, input_ids.shape)  # [batch_size] and [batch_size, max_input_length]

cuda activated
NVIDIA GeForce RTX 3060

torch.Size([16]) torch.Size([16, 512])


In [3]:
# load Pre-Trained BERT model
bert = BertModel.from_pretrained('bert-base-uncased')

# freeze BERT parameters for efficiency
for param in bert.parameters():
    param.requires_grad = False

# define BERT + GRU model
class BERTGRUSentiment(nn.Module):
    def __init__(self, bert, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super().__init__()
        self.bert = bert
        embedding_dim = bert.config.hidden_size
        self.rnn = nn.GRU(embedding_dim, 
                          hidden_dim, 
                          num_layers=n_layers, 
                          bidirectional=bidirectional, 
                          batch_first=True, 
                          dropout=0 if n_layers < 2 else dropout)
        self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text):
        attention_mask = (text != tokenizer.pad_token_id).long()
        with torch.no_grad():
            outputs = self.bert(input_ids=text, attention_mask=attention_mask)
            embedded = outputs.last_hidden_state
        
        _, hidden = self.rnn(embedded)
        if self.rnn.bidirectional:
            hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
        else:
            hidden = self.dropout(hidden[-1,:,:])
        
        return self.out(hidden)

In [4]:
# initialize model
HIDDEN_DIM = 256
OUTPUT_DIM = 3
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25

model = BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)
model.to(device)

# set optimizer and loss function
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss().to(device)

# train function with debug prints
def train(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss, epoch_acc = 0, 0
    
    for i, (labels, text) in enumerate(iterator):
        print(f"Processing batch {i+1}/{len(iterator)}")
        optimizer.zero_grad()
        predictions = model(text)
        predictions = predictions.view(-1, 3)
        labels = labels.long()
        loss = criterion(predictions, labels)
        # acc = ((torch.round(torch.sigmoid(predictions)) == labels).float().mean()).item()
        _, predicted_classes = predictions.max(1) 
        acc = (predicted_classes == labels).float().mean().item()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc
        if i % 10 == 0:
            print(f"Batch {i}: Loss {loss.item():.4f}, Accuracy {acc*100:.2f}%")

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# evaluate function
def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss, epoch_acc = 0, 0
    
    with torch.no_grad():
        for labels, text in iterator:
            # predictions = model(text).squeeze(1)
            predictions = model(text)
            predictions = predictions.view(-1, 3)
            labels = labels.long()
            loss = criterion(predictions, labels)
            # acc = ((torch.round(torch.sigmoid(predictions)) == labels).float().mean()).item()
            _, predicted_classes = predictions.max(1)
            acc = (predicted_classes == labels).float().mean().item()
            epoch_loss += loss.item()
            epoch_acc += acc

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [5]:
# training loop
N_EPOCHS = 5
best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    train_loss, train_acc = train(model, train_dataloader, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, test_dataloader, criterion)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), "bert_gru_model.pt")

    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%")
    print(f"Valid Loss: {valid_loss:.3f} | Valid Acc: {valid_acc*100:.2f}%")

# load best model and test
model.load_state_dict(torch.load("bert_gru_model.pt"))
test_loss, test_acc = evaluate(model, test_dataloader, criterion)
print(f"Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%")

Processing batch 1/732
Batch 0: Loss 1.0521, Accuracy 62.50%
Processing batch 2/732
Processing batch 3/732
Processing batch 4/732
Processing batch 5/732
Processing batch 6/732
Processing batch 7/732
Processing batch 8/732
Processing batch 9/732
Processing batch 10/732
Processing batch 11/732
Batch 10: Loss 0.8459, Accuracy 75.00%
Processing batch 12/732
Processing batch 13/732
Processing batch 14/732
Processing batch 15/732
Processing batch 16/732
Processing batch 17/732
Processing batch 18/732
Processing batch 19/732
Processing batch 20/732
Processing batch 21/732
Batch 20: Loss 0.6887, Accuracy 68.75%
Processing batch 22/732
Processing batch 23/732
Processing batch 24/732
Processing batch 25/732
Processing batch 26/732
Processing batch 27/732
Processing batch 28/732
Processing batch 29/732
Processing batch 30/732
Processing batch 31/732
Batch 30: Loss 0.5808, Accuracy 81.25%
Processing batch 32/732
Processing batch 33/732
Processing batch 34/732
Processing batch 35/732
Processing bat

In [6]:
# predict sentiment
def predict_sentiment(model, tokenizer, sentence, max_input_length=512, device='cuda'):
    model.eval() 
    tokens = tokenizer.encode(sentence, truncation=True, padding="max_length", 
                               max_length=max_input_length, return_tensors="pt").to(device)
    with torch.no_grad(): 
        logits = model(tokens)
        print("Logits: ", logits)  
    probs = F.softmax(logits, dim=1) 
    predicted_class = torch.argmax(probs, dim=1).item()
    return predicted_class, probs[0][predicted_class].item()

# test the model
print(predict_sentiment(model, tokenizer, 'This airline is the worst one I have ever been on because they are so rude on the phone calls.'))
print(predict_sentiment(model, tokenizer, 'The airline was decent, not the best, but is much better than some others.'))
print(predict_sentiment(model, tokenizer, 'Amazing airline experience, everyone was super nice and caring!'))

Logits:  tensor([[ 3.9670, -2.0033, -3.1911]], device='cuda:0')
(0, 0.9966789484024048)
Logits:  tensor([[ 0.6888, -1.6282, -0.1643]], device='cuda:0')
(0, 0.6558923721313477)
Logits:  tensor([[-2.7334, -1.0281,  3.7278]], device='cuda:0')
(2, 0.9899387359619141)


In [8]:
# Results:
# 
# Using this BERT with GRU layer model training on the Tweets dataset, the model was able to achieve 
# an approximate 84.02% accuracy while training with five epochs. However, at first, this was not the
# case as I had to spend some time properly loading the dataset via the CSV file and then setting it up 
# through the customer dataset class using a data frame, and then working it into a data loader. After 
# this, I was able to get the very first model version working with an approximate 63% training accuracy. 
# However, through further code analysis, I figured out that the output dimensions for this model specifically 
# should not be 1 as it was for the coding activity, but 3 as there are three sentiment values (e.g., 
# negative, neutral, and positive). With this change and a few adjustments in the training and evaluation 
# functions to fit the shape of the model with new output dimensions better, the training accuracy of the 
# model increased above 80% with even three epochs in training. 

# In addition, I took the opportunity and made a predict_sentiment function similar to the code example 
# with the change for three categories for the sentiment, and tested a few sentences against the model 
# with one sentence each for the three sentiment categories, where negative is 0, neutral is 1, and 
# positive is 2. The results agree with the training accuracy as the first and third sentences strongly 
# stick with the correct category as shown by the high probability/confidence score. What I found interesting 
# is the second sentence, which I wrote to be slightly confusing and on the neutral side purposefully. 
# The model predicted this sentence to be negative, but it had a lower probability/confidence score of about 
# 0.66. There is a chance that my model doesn't effectively categorize the three different sentiment 
# categories, or the predict_sentiment function isn't properly outputting the neutral category value of 1. 
# Also, it is likely that my sentence is poorly worded and is not the best for evaluation. Regardless, 
# these results still show that the model can effectively classify these different sentences for three 
# different sentiment categories. I think that is a win!