In [347]:
import torch
from torchtext.datasets import SST2

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


train_data = SST2(split="train")
eval_data = SST2(split="dev")

In [348]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

sentences = []
labels = []

for text, label in train_data:
    sentences.append(text)
    labels.append(label)

tokenized_texts = [tokenizer.tokenize(sentence) for sentence in sentences]

MAX_LEN = 128
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
input_ids = torch.tensor([ids[:MAX_LEN] + [0] * (MAX_LEN - len(ids)) for ids in input_ids])

attention_masks = torch.tensor([[1 if token_id > 0 else 0 for token_id in ids] for ids in input_ids])

labels = torch.tensor(labels)

len(input_ids)

67349

In [349]:
eval_sentences = []
eval_labels = []

for text, label in eval_data:
    eval_sentences.append(text)
    eval_labels.append(label)

eval_tokenized_texts = [tokenizer.tokenize(sentence) for sentence in eval_sentences]

MAX_LEN = 128
eval_input_ids = [tokenizer.convert_tokens_to_ids(x) for x in eval_tokenized_texts]
eval_input_ids = torch.tensor([ids[:MAX_LEN] + [0] * (MAX_LEN - len(ids)) for ids in eval_input_ids])

eval_attention_masks = torch.tensor([[1 if token_id > 0 else 0 for token_id in ids] for ids in eval_input_ids])
eval_labels = torch.tensor(eval_labels)

len(eval_input_ids)

872

In [341]:
len(eval_labels)


872

In [350]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(input_ids, attention_masks, labels)
eval_dataset = TensorDataset(eval_input_ids, eval_attention_masks, eval_labels)

BATCH_SIZE = 16

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=True)


In [387]:
from torch import nn, sigmoid

class TextClassificationModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
        super(TextClassificationModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, text):
        embedded = self.embedding(text)
        output, (hidden, cell) = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output.squeeze()


In [392]:
LR = 0.001
num_epochs = 2
EMBED_DIM = 128
HIDDEN_DIM = 128

model = TextClassificationModel(vocab_size, embed_dim=EMBED_DIM, hidden_dim=HIDDEN_DIM, num_classes=1)
model.to(DEVICE)

criterion = nn.BCEWithLogitsLoss().to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)

In [393]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')



The model has 4,039,041 trainable parameters


In [394]:
output[0]
#len(labels)
len(train_dataloader), len(eval_dataloader)


(4210, 55)

In [395]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [396]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    total_acc = 0

    for index, (input_ids, attention_masks, labels) in enumerate(train_dataloader):
        input_ids = input_ids.to(DEVICE)
        attention_masks = attention_masks.to(DEVICE)
        labels = labels.to(DEVICE)
        
        optimizer.zero_grad()

        output = model(input_ids)

        loss = criterion(output.squeeze(), labels.float())

        acc = binary_accuracy(output, labels)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_acc += acc.item()

        print(f"Epoch {epoch+1}/{num_epochs} | Batch {index}/{len(train_dataloader)} | Loss: {loss.item():.4f} | Accuracy: {acc.item()*100:.2f}%")

print(total_loss / len(train_dataloader), total_acc / len(train_dataloader))

Epoch 1/2 | Batch 0/4210 | Loss: 0.7272 | Accuracy: 31.25%
Epoch 1/2 | Batch 1/4210 | Loss: 0.7464 | Accuracy: 18.75%
Epoch 1/2 | Batch 2/4210 | Loss: 0.7034 | Accuracy: 43.75%
Epoch 1/2 | Batch 3/4210 | Loss: 0.7041 | Accuracy: 43.75%
Epoch 1/2 | Batch 4/4210 | Loss: 0.7305 | Accuracy: 31.25%
Epoch 1/2 | Batch 5/4210 | Loss: 0.7072 | Accuracy: 43.75%
Epoch 1/2 | Batch 6/4210 | Loss: 0.7588 | Accuracy: 12.50%
Epoch 1/2 | Batch 7/4210 | Loss: 0.6908 | Accuracy: 50.00%
Epoch 1/2 | Batch 8/4210 | Loss: 0.7189 | Accuracy: 31.25%
Epoch 1/2 | Batch 9/4210 | Loss: 0.7214 | Accuracy: 31.25%
Epoch 1/2 | Batch 10/4210 | Loss: 0.7037 | Accuracy: 43.75%
Epoch 1/2 | Batch 11/4210 | Loss: 0.7123 | Accuracy: 37.50%
Epoch 1/2 | Batch 12/4210 | Loss: 0.6820 | Accuracy: 56.25%
Epoch 1/2 | Batch 13/4210 | Loss: 0.7006 | Accuracy: 50.00%
Epoch 1/2 | Batch 14/4210 | Loss: 0.7096 | Accuracy: 43.75%
Epoch 1/2 | Batch 15/4210 | Loss: 0.7010 | Accuracy: 43.75%
Epoch 1/2 | Batch 16/4210 | Loss: 0.7360 | Accurac

In [362]:
data_iter = iter(train_dataloader)
input_ids, attention_masks, labels = next(data_iter)

input_ids = input_ids.to(DEVICE)
attention_masks = attention_masks.to(DEVICE)
labels = labels.to(DEVICE)
        
output = model(input_ids, attention_masks)


#optimizer.zero_grad()

#sentiment_scores = output.mean(dim=1)

loss = criterion(output.squeeze(), labels.float())

outputs, loss

(tensor([[-0.8509],
         [-2.6836],
         [ 0.8116],
         [ 1.0097],
         [-1.0161],
         [ 1.1587],
         [ 3.2035],
         [-2.6024],
         [ 0.1453],
         [ 0.3020],
         [ 3.3958],
         [ 2.9689],
         [-1.6010],
         [-2.6337],
         [-2.0616],
         [ 3.3160]], device='cuda:0', grad_fn=<AddmmBackward0>),
 tensor(0.1882, device='cuda:0',
        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>))

In [327]:
model.eval()
corrects = 0
total = 0

with torch.no_grad():
    for input_ids, attention_masks, labels in train_dataloader:

        input_ids = input_ids.to(DEVICE)
        attention_masks = attention_masks.to(DEVICE)
        labels = labels.to(DEVICE)
        
        output = model(input_ids, attention_masks)
        predicted_labels = (output > 0.5).long()

        corrects += (predicted_labels.argmax(1) == labels).sum().item()
        total += len(labels)

accuracy = corrects / total

accuracy

0.44217434557305973